/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/* 
 *   Sohrab Ismail-Beigi             May 1997
 *
 * A set of routines to calculate the bands via CG minimization of an
 * objective function (sum of eigenvalues) with constraints of
 * orthonormality of the states.  The Hamiltonian is assumed fixed.
 * 
 *
 */

/* $Id: calcallbands.c,v 1.1.1.1 1999/11/10 01:30:17 tairan Exp $ */

#include <stdio.h>
#include <math.h>
#include <time.h>
#include <string.h>

#include "header.h"

// #include "parallel.h"

/* Global flag for signal processing */
extern int resetcg_signal;
extern int subspace_diag_signal;
extern int copy_C_to_Y_signal;

/*
 * Returns the energy function of all bands (sum of eigenvalues):
 *    E = sum_k { trace(C[k]^Hsp*C[k]) }
 */
static real
all_bands_energy(Elecinfo *einfo,
		 Elecvars *evars,
		 Ioninfo *ioninfo)
{
  int k;
  real Ener;

  column_bundle *C = evars->C;

  /* temporary workspace */
  int max_col_length = 0;
  for (k=0; k < einfo->nkpts; k++)
    if (max_col_length < C[k].col_length)
      max_col_length = C[k].col_length;

  column_bundle HspC(C[0].tot_ncols,max_col_length);

  Ener = 0.0;
  for (k=0; k < einfo->nkpts; k++)
    {
      // manually adjust col_length;
      HspC.col_length = C[k].col_length;
      copy_innards_column_bundle(&(C[k]),&HspC);

      apply_Hsp(C[k],ioninfo,HspC);
      Ener += REAL( trace(C[k]^HspC) );
    }

  return Ener;
}

/*
 * Returns the total band energy function which is just
 *        E = sum_k { trace(C[k]^Hsp*C[k]) }
 * as well as calculating its derivate versus Y[k] (placed into grad[])
 * and computing the subspace Hamiltonians and diagonalizing them.
 */
static real
all_bands_energy_grad_Hsub(Elecinfo *einfo,
			   Elecvars *evars,
			   Ioninfo *ioninfo,
			   column_bundle *grad)
{
  int k;
  column_bundle *C = evars->C;
  matrix *Umhalf = evars->Umhalf;
  matrix *Hsub = evars->Hsub;
  matrix *Hsub_evecs = evars->Hsub_evecs;
  real **Hsub_eigs = evars->Hsub_eigs;
  
  real Ener;

  /* temporary workspace */
  int max_col_length = 0;
  for (k=0; k < einfo->nkpts; k++)
    if (max_col_length < C[k].col_length)
      max_col_length = C[k].col_length;

  column_bundle HspC(C[0].tot_ncols,max_col_length);

  Ener = 0.0;
  for (k=0; k < einfo->nkpts; k++)
    {
      // manually adjust col_length;
      HspC.col_length = C[k].col_length;
      copy_innards_column_bundle(&(C[k]),&HspC);

      apply_Hsp(C[k],ioninfo,HspC);
      Hsub[k] = C[k]^HspC;
      Hsub[k].hermetian = 1;
      diagonalize_herm(Hsub_eigs[k],Hsub_evecs[k],Hsub[k],Hsub[k].nr);
      Ener += REAL( trace(Hsub[k]) );
      apply_Pbar(C[k],HspC,grad[k]);
      do_column_bundle_matrix_mult(grad[k],Umhalf[k],HspC,0);
      grad[k] = HspC;
    }

  return Ener;
}

/*
 * Does a linmin along dir for all the bands:
 * the configuration we start with in evars has energy E0
 * and gradient grad.  Stepsize is the initial stepsize to use; the final
 * stepsize actually used in returned.
 */
static real
do_linmin_all_bands(column_bundle *dir,
		    Basis *basis,
		    Ioninfo *ioninfo,
		    Elecinfo *einfo,
		    Elecvars *evars,
		    real E0,
		    column_bundle *grad,
		    real stepsize,
		    Control &cntrl)
{
  real gamma,dderiv,curvature,E;
  column_bundle *Y = evars->Y;
  int nkpts = einfo->nkpts;

  /* Directional derivative */
  dderiv = 2.0*REAL(dot(nkpts,grad,dir));
  for (;;)
    {
      /* Shift Y by stepsize */
      scale_accumulate(nkpts,stepsize,dir,Y);

      /* Calculate energy of shifted position */
      calc_UVC(einfo,evars);
      E = all_bands_energy(einfo,evars,ioninfo);

      /* Shift back */
      scale_accumulate(nkpts,-stepsize,dir,Y);

      /* Do a parabolic fit to the E0, E, and dderiv
       * to get curvature for quadratic fit and proposed minimum (gamma) */
      curvature = 2.0*(E-E0-stepsize*dderiv)/(stepsize*stepsize);
      gamma = -dderiv/curvature;
      dft_log("dderiv = %8.1le  curvature = %8.1le\n",
	      dderiv,curvature);
      dft_log("stepsize = %8.1le    gamma = %8.1le\n",
	      stepsize,gamma);
      dft_log_flush();

      /* If curvature is wrong way, take a bigger step */
      if (curvature < 0.0)
	{ stepsize *= 4.0; continue; }
      /* If the proposed minimum is much larger than the stepsize,
       * increase stepsize */
      else if (fabs(gamma/stepsize) > 10.0)
	{ stepsize *= 10.0; continue; }
      /* If much smaller, decrease */
      // Tairan (10/15/1998): but the stepsize should not be too small.
      //  otherwise, numerical error will overwhelm the calculation
      //   Y += stepsize * dir
      else if ( (fabs(gamma/stepsize) <  0.1)
		&& (fabs(stepsize) > cntrl.stepsize_min) )
	{ stepsize *= 0.1; continue; }
      /* Otherwise, it was a good linmin so stop! */
      else
	break;
    }

  /* Move to the bottom of the parabola, given by gamma */
  scale_accumulate(nkpts,gamma,dir,Y);

  /* Return the stepsize */
  return stepsize;
}


/*
 * Do niter CG steps on all the bands
 */
static real
minimize_all_bands_cg(int niter,
		      real stepsize,
		      Basis *basis,
		      Ioninfo *ioninfo,
		      Elecinfo *einfo,
		      Elecvars *evars,
		      Control &cntrl)
{
  int k,iter;
  time_t timenow;
  real linminfact=0.0,cosgg=0.0,alpha,abs2_grad_now,abs2_grad_old,E;
  int nkpts = einfo->nkpts;
  int nbands = einfo->nbands;

  /* gradients and search direction */
  column_bundle *grad_now,*grad_old,*dir;

  /* Allocate space for the gradients and search directions */
  grad_now = alloc_column_bundle_array(nkpts,nbands,basis);
  grad_old = alloc_column_bundle_array(nkpts,nbands,basis);
  dir = alloc_column_bundle_array(nkpts,nbands,basis);

  dft_log("\n----- minimize_all_bands_cg() -----\n");
  dft_log("Starting %d iterations of conjugate gradients with\n",
	    niter);
  dft_log("initial stepsize = %lg.\n\n",stepsize);
  dft_log_flush();
  
  for (iter=0; iter < niter; iter++)
    {

#ifdef DFT_TRACE_MEM
      mem_trace_report();
#endif // DFT_TRACE_MEM

      /* Calculate energy and gradient of current config */
      calc_UVC(einfo,evars);
      E = all_bands_energy_grad_Hsub(einfo,evars,ioninfo,grad_now);

      /* Square length of gradients */
      abs2_grad_now = abs2(nkpts,grad_now);
      abs2_grad_old = abs2(nkpts,grad_old);

      /* Some linimin statistics from last iteration */
      if (iter > 0)
	{
	  linminfact = 
	    REAL(dot(nkpts,grad_now,dir))/REAL(dot(nkpts,grad_old,dir));
	  cosgg = 4.0*REAL(dot(nkpts,grad_old,grad_now))/
	    sqrt( 16.0*abs2_grad_now*abs2_grad_old );
	  dft_log("\nlinmin = %8.1le   cosgg = %8.1le\n\n",
		    linminfact,cosgg);
	  dft_log_flush();
	}

      /* Time stamp and print out current energies and subspace Hamiltonian */
      timenow = time(0);
      dft_log("------------------------------------------------------\n");
      dft_log("Iteration %d   %s\n",iter,ctime(&timenow));
      dft_log("Etot = %23.13le\n",E);
      dft_log("\n");
      dft_log("nkpts=%d:  Hsub eigenvalues are:\n",nkpts);
      for (int band=0; band < einfo->nbands; band++)
	{
	  for (k=0; k < nkpts; k++)
	    dft_log("%le  ",evars->Hsub_eigs[k][band]);
	  dft_log("\n");
	}
      dft_log("\n");
      dft_log_flush();

      /* If this is the last iteration, don't linmin, but
       * quit the iteration loop and exit the routine */
      if (iter==niter-1) {
	if (! cntrl.if_e_converged(E)) // check Etot convergence.
	  cntrl.print(dft_global_log,iter);
	break;
      }

      // If the energy is converged, break the loop.
      if (cntrl.if_e_converged(E)) {
	cntrl.print(dft_global_log,iter);
	break;
      }

      /* Calculate search direction */
      alpha = 0.0;
      /* If we're given the reset CG signal, keep alpha at zero and
       * resset the signal flag. */
      if (resetcg_signal == 1)
	resetcg_signal = 0;
      /* If we've done a "good" linmin, use CG: */
      /* i.e. calculate alpha */
      else if (iter>0 && fabs(linminfact) < 0.05 && fabs(cosgg) < 0.1)
	alpha = abs2_grad_now/abs2_grad_old;
      dft_log("|grad| = %le\n",2.0*sqrt(abs2_grad_now));
      dft_log("alpha = %8.1le\n",alpha);
      dft_log_flush();

      /* Calculate current search direction:
       * d_now = 2.0*grad_now + alpha*d_old */
      for (k=0; k < nkpts; k++)
	{
	  /* zero out dir[k] on first iteration */
	  if (iter == 0)
	    dir[k].zero_out();
	  dir[k] *= alpha;
	  dir[k] += grad_now[k];
	  dir[k] += grad_now[k];
	}

      /* Do a linmin along dir */
      stepsize = do_linmin_all_bands(dir,basis,
				     ioninfo,einfo,
				     evars,
				     E,grad_now,stepsize,
				     cntrl);

      /* copy old gradient */
      for (k=0; k < nkpts; k++)
	grad_old[k] = grad_now[k];
    }

  /* Free up memory used by all the gradients and diretions */
  free_column_bundle_array(nkpts,grad_now);
  free_column_bundle_array(nkpts,grad_old);
  free_column_bundle_array(nkpts,dir);

  /* return stepsize used */
  return stepsize;

}
		    

/*
 * This function either reads C_filename or uses random numbers
 * for initial band C variables.  It allocates space and variables
 * for the bands, and then does niter_cg CG cycles on the
 * bands.  It then frees up the memory used, writes the final C
 * values to the file 'C_bands.final', returns.
 */
void
calc_all_bands(int niter_cg,real stepsize,
	       Basis *basis,
	       Elecinfo *einfo,
	       Elecvars *evars,
	       Ioninfo *ioninfo,
	       char *init_C_action,char *C_filename,
	       Control &cntrl)
{
  int k,nkpts;

  nkpts = einfo->nkpts;
  dft_log("\n----- calc_all_bands() -----\n");
  dft_log_flush();

  /* Either read the initial states from disk... */
  if (strcmp(init_C_action,"read") == 0)
    {
      dft_log("\n-----> Reading C from '%s'\n\n",C_filename);
      dft_log_flush();
      read_column_bundle_array(C_filename,nkpts,evars->Y);
    }
  /* or randomize the empty states. */
  else
    {
      dft_log("\n---->  Randomizing bands.\n\n");
      dft_log_flush();
      System::seed_with_time();
      randomize_column_bundle_array(nkpts,evars->Y);
      /* Orthonormalize the Y */
      calc_UVC(einfo,evars);
      for (k=0; k < nkpts; k++)
	evars->Y[k] = evars->C[k];
    }

  /* Do CG on the bands */
  minimize_all_bands_cg(niter_cg,stepsize,basis,ioninfo,
			einfo,evars,cntrl);

  /* Change C_empty to be the digonal basis of Hsub */
  dft_log("\nSwitching to diagonal basis of Hsub:  C <- HsubC\n\n");
  dft_log_flush();

  for (k=0; k < nkpts; k++)
    {
      do_column_bundle_matrix_mult(evars->C[k],
				   evars->Hsub_evecs[k],
				   evars->Y[k],0);
      evars->C[k] = evars->Y[k];
    }
  
  /* Write out empty state variables */
  dft_log("\n\nWriting out C_bands.final\n\n");
  dft_log_flush();
  write_column_bundle_array("C_bands.final",nkpts,evars->C);
}

