/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 * matrix.C
 *
 */

/* $Id: matrix.c,v 1.1.1.1 1999/11/10 01:30:17 tairan Exp $ */

#include "stdio.h"
#include "math.h"

#include "header.h"

//
// The init() function is the main initialization routine.
// Sets up sizes and allocates memory for matrix class.
// All constructors call init()
//
void
matrix::init(int nrows,int ncols)
{
  nr = nrows;
  nc = ncols;
  hermetian = 0;
  // If null constructor, set data to point to nothing
  if (nr*nc == 0)
    c = NULL;
  // Otherwise, get some memory
  else
    c = (scalar *)mymalloc(sizeof(scalar)*nrows*ncols,
			   "c","matrix.init()");
}

// Free up memory for matrix
void
matrix::freemem(void)
{
  myfree(c);
  c = NULL;
}

// Null constructor
matrix::matrix()
{
  init(0,0);
}

// constructor with sizes
matrix::matrix(int nrows,int ncols)
{
  init(nrows,ncols);
}

// Copy constructor
matrix::matrix(const matrix &m1)
{
  // Setup copied object to have correct size and get memory for it
  init(m1.nr,m1.nc);
  hermetian = m1.hermetian;
  // Copy over m1 contents
  int i;
  for (i=0; i < nr*nc; i++)
    c[i] = m1.c[i];
}

// destructor
matrix::~matrix()
{
  freemem();
}

/* Assignment:  nonstandard in that it returns void.  To make it standard,
 * replace void -> matrix and uncomment the return *this; */
void matrix::operator=(const matrix &m1)
{
  int i;

  /* The sizes must agree */
  if ( (nr != m1.nr) || (nc != m1.nc) )
    die("In matrix::operator=, sizes don't agree.\n\n");
  hermetian = m1.hermetian;
  for (i=0; i < nr*nc; i++)
    c[i] = m1.c[i];
  /* return *this; */
}

/* Adding matrices, as a friend */
matrix operator+(const matrix &m1,const matrix &m2)
{
  int i;

  if ( (m1.nr != m2.nr) || (m1.nc != m2.nc) )
    die("In matrix::operator+, sizes don't agree.\n\n");

  matrix tm(m1);

  for (i=0; i < m1.nr*m1.nc; i++)
    tm.c[i] += m2.c[i];

  return tm;
}

/* Subtracting matrices, as a friend */
matrix operator-(const matrix &m1,const matrix &m2)
{
  int i;

  if ( (m1.nr != m2.nr) || (m1.nc != m2.nc) )
    die("In matrix::operator-, sizes don't agree.\n\n");

  matrix tm(m1);

  for (i=0; i < m1.nr*m1.nc; i++)
    tm.c[i] -= m2.c[i];

  return tm;
}

/* Multiplying matrices, as a friend */
matrix operator*(const matrix &m1, const matrix &m2)
{
  // check sizes
  if (m1.nc != m2.nr)
    die("In matrix::operator*, m1.nc != m2.nr\n\n");

  // allocate the result and zero it out
  matrix mprod(m1.nr,m2.nc);
  mprod.zero_out();

  // do the multiply and return
  matrix_matrix_block_matrix_mult(m1,m2,mprod);

  return mprod;
}

/* Scale a matrix */
matrix operator*(scalar s,const matrix &m)
{
  int i;
  matrix sm(m);

  for (i=0; i < m.nr*m.nc; i++)
    sm.c[i] *= s;
  return sm;
}

/* matrix * scalar */
matrix operator*(const matrix &m, scalar s)
{
  int i;
  matrix sm(m);

  for (i=0; i < m.nr*m.nc; i++)
    sm.c[i] *= s;
  return sm;
}

/* diag_matrix*matrix */
matrix operator*(const diag_matrix &d, const matrix &m)
{
  int i,j;

  if (d.n != m.nr)
    die("In matrix operator*:  d.n != m.nr\n");

   matrix dm(d.n,m.nc);

   for (i=0; i < d.n; i++)
     for (j=0; j < m.nc; j++)
       dm(i,j) = d.c[i]*m(i,j);
  return dm;
}

/* matrix*diag_matrix */
matrix operator*(const matrix &m,const diag_matrix &d)
{
  int i,j;

  /*if (m.nc != d.n)
    die("In matrix operator*:  m.nr != d.n\n"); */
  if (m.nc != d.n)
    die("m = %dx%d, d.n = %d\n%sIn matrix operator*:  m.nr != d.n\n",
	m.nr,m.nc,d.n);

   matrix md(m.nr,d.n);

   for (i=0; i < m.nr; i++)
     for (j=0; j < d.n; j++)
       md(i,j) = m(i,j)*d.c[j];
  return md;
}

/* Scale a matrix in place */
void matrix::operator*=(scalar s)
{
  int i;

  for (i=0; i < nc*nr; i++)
    c[i] *= s;
}

/* Accumulate to a matrix in place */
void matrix::operator+=(const matrix &m)
{
  if (nr!=m.nr || nc!=m.nc)
    die("void matrix::operator+(const matrix &m) has size mismatch\n");

  int i;

  for (i=0; i < nc*nr; i++)
    c[i] += m.c[i];
}

/* Diagonalization routine: uses the FORTRAN77 routines diagrs77_
 * and diagch77; however I DO NOT declare them before using them
 * since then the compiler can't find the routines in the FORTRAN object
 * files since C++ naming conventions differ from the FORTRAN complier
 * naming options in the object files (using objdump -t <file>.o to
 * see the naming of routines */

/*
 * C interfaces to the Lapack diagonalization.
 */
extern "C" int diagch77_C(real *eigs,complex *evecs,complex *A,int *n);
extern "C" int diagrs77_C(real *eigs,real *evecs,real *A,int *n);

void
diagonalize_herm(real *eigs,matrix &evecs,
		 matrix &a,int n)
{
#ifdef DFT_PROFILING
  timerOn(27);   // Turn on diagonalize_herm timer
#endif // DFT_PROFILING

  int i,j;
  scalar z;

  if (evecs.nr != evecs.nc)
    die("evecs is not square in diagonalize_herm()\n\n");
  if (a.nr != a.nc)
    die("a is not square in diagonalize_herm()\n\n");
  if (a.nr != evecs.nr)
    die("Size mismatche of a and evecs in diagonalize_herm()\n\n");
  if (a.nr != n)
    die("n != a.nr in diagonalize_herm()\n\n");
  if (!a.hermetian)
    die("a is not hermetian in diagonalize_herm()\n");
  /* We have to flip the a as FORTRAN access arrays
   * in reverse order from C */
  for (i=0; i < n; i++)
    for (j=i+1; j < n; j++)
      {
	z = a(i,j);
	a(i,j) = a(j,i);
	a(j,i) = z;
      }

  int status = 0;
#if defined SCALAR_IS_COMPLEX
  status = diagch77_C(eigs,evecs.c,a.c,&n);
#elif defined SCALAR_IS_REAL
  status = diagrs77_C(eigs,evecs.c,a.c,&n);
#else
#error scalar is neither real nor complex!
#endif
  if (status != 0)
    die("Status!=0 from calling diag??77_C(): no memory for AP.\n");

  /* We have to flip the eigenvector matrices as FORTRAN access arrays
   * in reverse order from C; we also flip a back to its original form. */
  for (i=0; i < n; i++)
    for (j=i+1; j < n; j++)
      {
	z = evecs(i,j);
	evecs(i,j) = evecs(j,i);
	evecs(j,i) = z;
	z = a(i,j);
	a(i,j) = a(j,i);
	a(j,i) = z;
      }

#ifdef DFT_PROFILING
  timerOff(27);   // Turn off diagonalize_herm timer
#endif // DFT_PROFILING
}

/* Return hermetian adjoint of a matrix */
matrix
herm_adjoint(matrix &a)
{
  int i,j;

  matrix adag(a.nc,a.nr);
  
  for (i=0; i < a.nc; i++)
    for (j=0; j < a.nr; j++)
#if defined SCALAR_IS_COMPLEX
      adag(i,j) = conjugate(a(j,i));
#elif defined SCALAR_IS_REAL
      adag(i,j) = a(j,i);
#else
#error scalar is neither real nor complex!
#endif
  return adag;
}

/* returns ^(-1/2) power of a hermetian matrix U */
matrix
Uminusonehalf(matrix &U,matrix &W,real *u)
{
  int i;

  if (!U.hermetian)
    die("In Uminusonehalf:  U is not hermetian!\n");
  if (U.nr != U.nc)
    die("In Uminusonehalf:  U is not square\n");
  if (W.nr != W.nc)
    die("In Uminusonehalf:  W is not square\n");
  if (U.nr != W.nr)
    die("In Uminusonehalf:  U.n != W.n\n");

  /* Diagonalize U:  U = W*u*Wdag where W is unitary and u is diagonal */
  diagonalize_herm(u,W,U,U.nr);

  /* do u -> u^(-1/2) and then multiply out to get U^(-1/2) */
  diag_matrix umat(U.nr);
  matrix Umhalf(U.nr,U.nc);

  for (i=0; i < U.nr; i++)
    {
      if (u[i] <= 0.0)
	die("In Uminusonehalf, eigenvalue %d is %e <= 0.0\n",i,u[i]);
      umat.c[i] = (real)1.0/sqrt((double)u[i]);
    }
  Umhalf = W*(umat*herm_adjoint(W));
  Umhalf.hermetian = 1;
  return Umhalf;
}

/* write matrix in binary format to file fname */
void 
matrix::write(char *fname)
{
  FILE *fp;

  /* dft_fopen() never returns NULL. that branch dies within the call, 
   * and does the appropriate thing according to whether we are in a serial
   * or MPI context
   */
  fp = dft_fopen(fname,"w"); 
  dft_fwrite(c,sizeof(scalar),nr*nc,fp);
  dft_fclose(fp);
}


void
matrix::write(FILE *fp)
{
  /* Make sure that if this is called from a non-io node on an MPI 
   * system, then fp is NULL (ie. it was opened with dft_fopen() ) */
  dft_fwrite(c,sizeof(scalar),nr*nc,fp);
}

/* read matrix in binary format from file fname */
void matrix::read(char *fname)
{
  FILE *fp;

  fp = dft_fopen(fname,"r");
  dft_fread(c,sizeof(scalar),nr*nc,fp);
  dft_fclose(fp);
}


void
matrix::read(FILE *fp)
{
  /* Make sure that if this is called from a non-io node on an MPI 
   * system, then fp is NULL (ie. it was opened with dft_fopen() ) */
  dft_fread(c,sizeof(scalar),nr*nc,fp);
}


/* zero out the matrix */
void
matrix::zero_out(void)
{
  int i;

  for (i=0; i < nr*nc; i++)
    c[i] = (scalar)0.0;
}

/* Allocate/free an array of matrices */
matrix *
alloc_matrix_array(int nmats,int nrows,int ncols)
{
  int i;
  matrix *M;

  M = (matrix *)mymalloc(sizeof(matrix)*nmats,"alloc_matrix_array","M");
  for (i=0; i < nmats; i++)
    M[i].init(nrows,ncols);

  return M;
}

void
free_matrix_array(int nmats,matrix *M)
{
  int i;

  for (i=0; i < nmats; i++)
    M[i].freemem();
  myfree(M);
}

/* Read/write an array of matrices */
void
read_matrix_array(char *fname,int nmatrices,matrix *M)
{
  FILE *filep;
  int i;

  filep = dft_fopen(fname,"r");
  for (i=0; i < nmatrices; i++) 
    dft_fread(M[i].c,sizeof(scalar),M[i].nr*M[i].nc,filep);

  dft_fclose(filep);
}



void
write_matrix_array(char *fname,int nmatrices,matrix *M)
{
  FILE *filep;
  int i;

  filep = dft_fopen(fname,"w");
  for (i=0; i < nmatrices; i++)
    dft_fwrite(M[i].c,sizeof(scalar),M[i].nr*M[i].nc,filep);
  dft_fclose(filep);
}


/*
 * The matrix calculates the linear operator Q(G) with a matrix of
 * eigenvectors W and eigenvalues mu (these come from the overlap
 * operators where U = Wdag*mu*W).  This is needed for the case of
 * variable fillings.
 * The definition of Q(G) is via:
 *
 * (Wdag*Q(G)*W)_{ij} = (Wdag*G*W)_{ij}/(sqrt(mu_i)+sqrt(mu_j))
 *
 */
matrix
Q(const matrix &G,matrix &W,real *mu)
{
  int i,j;

  if (G.nr != G.nc)
    die("Q(G) called with G not square\n");
  if (W.nr != W.nc)
    die("Q(G) called with W not square\n");
  if (G.nr != W.nr)
    die("Q(G) called with G.nr != W.nr\n");

  /* Take G into the diagonal basis: i.e. W^*G*W, where ^ is herm. adjoint */
  matrix QG(G.nr,G.nc),Wdag(W.nr,W.nc);
  Wdag = herm_adjoint(W);
  QG = Wdag*G*W;

  /* Divide (i,j) entry by sqrt(mu_i)+sqrt(mu_j) */
  for (i=0; i < QG.nr; i++)
    for (j=0; j < QG.nc; j++)
      {
	if (mu[i] < 0.0 || mu[j] < 0.0)
	  die("Q(G) called with mu[%d]=%g mu[%d]=%g\n",i,mu[i],j,mu[j]);
	QG(i,j) = QG(i,j)/(sqrt(mu[i])+sqrt(mu[j]));
      }

  /* Multiply by W and W^ to bring back to standard basis */
  QG = W*QG*Wdag;

  /* Done! */
  QG.hermetian = G.hermetian;
  return QG;
}

/* Returns trace of matrix */
scalar
trace(const matrix &m)
{
  scalar tr;
  int i;

  if (m.nr != m.nc)
    die("trace(matrix) called with non-square matrix\n");

  tr = 0.0;
  for (i=0; i < m.nr; i++)
    tr += m(i,i);
  return tr;
}

/* Returns diagonal of matrix as a vector */
vector
diag(const matrix &m)
{
  
  if (m.nr != m.nc)
    die("diag(matrix) called with non-square matrix\n");

  vector d(m.nr);
  int i;

  for (i=0; i < m.nr; i++)
    d.c[i] = m(i,i);
  return d;
}

/*
 * The matrix calculates the linear operator R(A) with a matrix of
 * eigenvectors Z and eigenvalues beta (these come from the matrix
 * B where B=Z*beta*Zdag and V=exp(iB) is the subspace rotation matrix.
 * This is needed for the case of variable fillings.
 * The definition of R(A) is via:
 *
 * (Zdag*R(A)*Z)_{nm} = (Zdag*A*Z)_{nm}*
 *         ( (exp(i*beta_m-i*beta_n)-1)/(beta_m-beta_n) )
 *
 * The n=m case just gives i for the ratio.
 */
matrix
R(const matrix &A,matrix &Z,real *beta)
{
  int i,j;

  if (A.nr != A.nc)
    die("R(A) called with G not square\n");
  if (Z.nr != Z.nc)
    die("R(A) called with Z not square\n");
  if (A.nr != Z.nr)
    die("R(A) called with A.nr != Z.nr\n");

  /* Take A into the diagonal basis: i.e. Z^*A*Z, where ^ is herm. adjoint */
  matrix RA(A.nr,A.nc),Zdag(Z.nr,Z.nc);
  Zdag = herm_adjoint(Z);
  RA = Zdag*A*Z;

  /* Multiply (n,m) entry by (exp(i*beta_n-i*beta_m)-1)/(beta_n-beta_m) */
  for (i=0; i < RA.nr; i++)
    for (j=0; j < RA.nc; j++)
      {
	register complex ratio;
	register real deltaji = beta[j]-beta[i];

	/* Compute ratio=(exp(i*beta[n]-i*beta[m])-1)/(beta[n]-beta[m] */
	if ( fabs(deltaji) < 1.0e-13*(fabs(beta[i])+fabs(beta[j])) )
	  {
	    ratio.x = 0.0;
	    ratio.y = 1.0;
	  }
	else
	  {
	    register complex z;

	    z.x = 0.0;
	    z.y = deltaji;
	    ratio = (exp(z)-1.0)/deltaji;
	  }
	/* Multiply RA by ratio */
	RA(i,j) = RA(i,j)*ratio;
      }

  /* Multiply by Z and Z^ to bring back to standard basis */
  RA = Z*RA*Zdag;

  /* Done! */
  RA.hermetian = 0;

  return RA;
}

/* Does mout[i] += s*min[i] */
void
scale_accumulate(int nmat,scalar s,matrix *min,matrix *mout)
{
  int i,j;

  for (i=0; i < nmat; i++)
    {
      if (min[i].nr != mout[i].nr || min[i].nc != mout[i].nc)
	die ("Incompatible sizes in scale_accumulate(nmat,s,min,mout)\n");
      for (j=0; j < min[i].nc*min[i].nr; j++)
	mout[i].c[j] += s*min[i].c[j];
    }
}

/* returns absolute square magnitude of sum of all matrix elements */
real
abs2(const matrix &m)
{
  register real r;
  register scalar z;
  register int i;

  r = 0.0;
  for (i=0; i < m.nr*m.nc; i++)
    {
      z = m.c[i];
      r += z.x*z.x + z.y*z.y;
    }
  return r;
}    

/* Same for an array of matrices */
real
abs2(int nmat,matrix *m)
{
  int i;
  real r;

  r = 0.0;
  for (i=0; i < nmat; i++)
    r += abs2(m[i]);
  return r;
}

/*
 * returns the "dot" product of two matrices...i.e.
 * the sum over all the products of their respective entries:
 * dot(A,B) = sum_{ij} { A(i,j)*B(i,j) }
 */
scalar
dot(matrix &A,matrix &B)
{
  if (A.nr!=B.nr || A.nc!=B.nc)
    die("In dot(matrix,matrix) the sizes don't match\n");
  
  scalar d;
  int i;

  d = 0.0;
  for (i=0; i < A.nr*A.nc; i++)
#if defined SCALAR_IS_COMPLEX
    d += conjugate(A.c[i])*B.c[i];
#elif defined SCALAR_IS_REAL
#else
    d += A.c[i]*B.c[i];
#error scalar is neither real nor complex!
#endif
  return d;
}

/* Same for an array of matrices */
scalar
dot(int nmat,matrix *A,matrix *B)
{
  scalar d;
  int i;

  d = 0.0;
  for (i=0; i < nmat; i++)
    d += dot(A[i],B[i]);
  return d;
}
