/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 * atomic_forces.c:   Sohrab Ismail-Beigi     May 6, 1997
 *                        
 * A set of routines to calculate the forces on the atoms
 *
 */

/* $Id: calcionicforces.c,v 1.1.1.1 1999/11/10 01:30:17 tairan Exp $ */

#include <stdio.h>
#include <math.h>
#include "header.h"
#include "parallel.h"

/*
 * This routine calculates the forces on the atoms due to the derivate 
 * of the local pseudopotential versus atomic coordinates.
 * 
 * The forces are accumulated into ioninfo->species[].forces[]
 *
 * The ionic force due to local potential is:
 *   f_sp_nat = - (  dV_loc/dR_sp_nat ^ (Jn_v) )
 */
void
calc_ionic_forces_Vloc(Elecvars *elecvars,
		       Ioninfo *ioninfo,
		       Basis *basis)
{
  int sp,i, r, m, howmany, atom_start, natoms;
  int NPROCS = System::Get_N_Procs();
  int MY_ID = System::Get_procID();
  vector3 *force_buf;

  dft_log("\n--- calc_atomic_forces_Vloc() ---\n");

  /* Calculate J(n) */
  vector Jn(basis->NxNyNz, basis);
  apply_J(elecvars->n,Jn);

  /* loop over species */
  for (sp=0; sp < ioninfo->nspecies; sp++)
    {

      dft_log("species = %d  natoms = %d\n",
		sp,ioninfo->species[sp].natoms);
      dft_log_flush();

      /* Calculate forces over atoms across the processes. */
      // Parallelize with MPI
      natoms = ioninfo->species[sp].natoms;
      r = natoms % NPROCS;
      m = (natoms - r)/NPROCS;
      
      force_buf = (vector3 *)mymalloc(sizeof(vector3)*natoms,"force_buf[]",
				      "Vloc_force");
      // mymalloc also zero out the force_buf.

      if (MY_ID < r) { atom_start = MY_ID*(m+1); howmany = m+1; }
      else { atom_start = MY_ID*m+r; howmany = m; }

      if (howmany > 0) 
	{
	  int NxNyNz = basis->NxNyNz;
	  vector dVx(NxNyNz,basis), dVy(NxNyNz,basis), dVz(NxNyNz,basis);

	  for (i = atom_start; i < atom_start+howmany; i++)
	    {
	      dVloc_pseudo_datom_pos(basis,ioninfo,sp,i,
				     dVx.c,dVy.c,dVz.c);
	      force_buf[i].v[0] = ioninfo->species[sp].forces[i].v[0] -REAL(dVx^Jn);
	      force_buf[i].v[1] = ioninfo->species[sp].forces[i].v[1] -REAL(dVy^Jn);
	      force_buf[i].v[2] = ioninfo->species[sp].forces[i].v[2] -REAL(dVz^Jn);
	    }      /* If there is nothing to do, don't do anything!! */
	}

#ifdef DFT_MPI
      // Now, got to communicate the forces result to everybody.
      // In principle, need to do bunch of broadcast calls.
      // For now, use a Allreduce call. Simple but may not be the best choice.
#ifdef DFT_PROFILING
      timerOn(13);  // Turn on other MPI_Allreduce timer
#endif // DFT_PROFILING
      MPI_Allreduce ( force_buf, &(ioninfo->species[sp].forces[0]),
		      3*natoms, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
#ifdef DFT_PROFILING
      timerOff(13);  // Turn off other MPI_Allreduce timer
#endif // DFT_PROFILING

#else // DFT_MPI
      for ( i = 0; i < natoms; i++ )
	ioninfo->species[sp].forces[i] = force_buf[i];
#endif // DFT_MPI

      myfree(force_buf);
    } /* species (sp) loop */


  dft_log("\n");
  dft_log_flush();

}


/*
 * This routine calculates the forces on the atoms due to the derivate 
 * of the non-local pseudopotential versus atomic coordinates.
 * 
 * The forces are accumulated into ioninfo->species[].forces[]
 *
 */
void
calc_ionic_forces_Vnl(Elecinfo *elecinfo,
		      Elecvars *elecvars,
		      Ioninfo *ioninfo,
		      Basis *basis)
{
  int sp,lm,i,k;
  column_bundle *C = elecvars->C;
  diag_matrix *F = elecinfo->F;
  real *w = elecinfo->w;

  dft_log("\n--- calc_atomic_forces_Vnl() ---\n");

  for (sp=0; sp < ioninfo->nspecies; sp++)
    for (lm=0; lm < ioninfo->species[sp].nlm; lm++)
      {
	
	dft_log("species = %d   nlm = %d   lm = %d   natoms = %d\n",
		  sp,ioninfo->species[sp].nlm,lm,
		  ioninfo->species[sp].natoms);
	dft_log_flush();

	if (ioninfo->species[sp].ngamma[lm] > 1)
	  {
	    dft_log(DFT_SILENCE,
		    "\nMultiple-projectors: running slow nl forces!\n");

	    matrix VdagC(ioninfo->species[sp].ngamma[lm],elecinfo->nbands);
	    matrix dVdagC(ioninfo->species[sp].ngamma[lm],elecinfo->nbands);
	    matrix &Mnl = ioninfo->species[sp].M[lm]; /* reference */
	    
	    for (k=0; k < elecinfo->nkpts; k++) {
	      column_bundle Vnl(ioninfo->species[sp].ngamma[lm],
				basis[k].nbasis,"local");
	      column_bundle dVx(ioninfo->species[sp].ngamma[lm],
				basis[k].nbasis,"local");
	      column_bundle dVy(ioninfo->species[sp].ngamma[lm],
				basis[k].nbasis,"local");
	      column_bundle dVz(ioninfo->species[sp].ngamma[lm],
				basis[k].nbasis,"local");
	      copy_innards_column_bundle(&(C[k]),&Vnl);
	      
	      for (i=0; i < ioninfo->species[sp].natoms; i++)
		{
		  dVnl_pseudo_datom_pos(sp,i,lm,
					elecinfo->kvec[k],&basis[k],ioninfo,
					Vnl,dVx,dVy,dVz);
		  VdagC = Vnl^C[k];
		  /* x-force */
		  dVdagC = dVx^C[k];
		  ioninfo->species[sp].forces[i].v[0] += -2.0*REAL(w[k]*trace(Mnl*dVdagC*F[k]*herm_adjoint(VdagC)));
		  /* y-force */
		  dVdagC = dVy^C[k];
		  ioninfo->species[sp].forces[i].v[1] += -2.0*REAL(w[k]*trace(Mnl*dVdagC*F[k]*herm_adjoint(VdagC)));
		  /* z-force */
		  dVdagC = dVz^C[k];
		  ioninfo->species[sp].forces[i].v[2] += -2.0*REAL(w[k]*trace(Mnl*dVdagC*F[k]*herm_adjoint(VdagC)));
		}
	    }
	  }
	/* Kleinman-Bylander:  bunch up all local potentials for
	 * the atoms of this species and state into a big column_bundle
	 * and work on them instead (should be faster due to ^ and *
	 * operators being block-multiplies, etc.).
	 */
	else
	  {
	    matrix VdagC(ioninfo->species[sp].natoms,elecinfo->nbands);
	    matrix dVdagC(ioninfo->species[sp].natoms,elecinfo->nbands);
	    scalar Mnl = ioninfo->species[sp].M[lm](0,0);
	    vector fvector(ioninfo->species[sp].natoms);

	    /* To save us from repeatly allocate and deallocate column_bundles, 
	     * allocate once for the largest basis and keep using it.
	     */
	    int max_nbasis = 0;
	    for (k=0; k < elecinfo->nkpts; k++) 
	      if (max_nbasis < basis[k].nbasis)
		max_nbasis = basis[k].nbasis;

	    column_bundle Vnloneatom(1,max_nbasis,"local");
	    column_bundle dVxoneatom(1,max_nbasis,"local");
	    column_bundle dVyoneatom(1,max_nbasis,"local");
	    column_bundle dVzoneatom(1,max_nbasis,"local");
	    
	    // Vnl, dVx, dVy, dVz are created as distributed column_bundles.
	    // the dimension that's distributed is ioninfo->species[sp].natoms
	    column_bundle Vnl(ioninfo->species[sp].natoms,max_nbasis);
	    column_bundle dVx(ioninfo->species[sp].natoms,max_nbasis);
	    column_bundle dVy(ioninfo->species[sp].natoms,max_nbasis);
	    column_bundle dVz(ioninfo->species[sp].natoms,max_nbasis);

	    for (k=0; k < elecinfo->nkpts; k++)
	      {
		// Manually adjust the dimension of column_bundles.
		Vnloneatom.col_length = basis[k].nbasis;
		dVxoneatom.col_length = basis[k].nbasis;
		dVyoneatom.col_length = basis[k].nbasis;
		dVzoneatom.col_length = basis[k].nbasis;
		
		Vnl.col_length = basis[k].nbasis;
		dVx.col_length = basis[k].nbasis;
		dVy.col_length = basis[k].nbasis;
		dVz.col_length = basis[k].nbasis;
		copy_innards_column_bundle(&(C[k]),&Vnl);

		// distribute ioninfo->species[sp].natoms
		for (i=0; i < Vnl.my_ncols; i++)
		  {
		    register int j;
		    
		    dVnl_pseudo_datom_pos(sp,i+Vnl.start_ncol,lm,
					  elecinfo->kvec[k],
					  &basis[k],ioninfo,
					  Vnloneatom,
					  dVxoneatom,dVyoneatom,dVzoneatom);

		    for (j=0; j < basis[k].nbasis; j++)
		      {
			Vnl.col[i].c[j] = Vnloneatom.col[0].c[j];
			dVx.col[i].c[j] = dVxoneatom.col[0].c[j];
			dVy.col[i].c[j] = dVyoneatom.col[0].c[j];
			dVz.col[i].c[j] = dVzoneatom.col[0].c[j];
		      }

		  }

		/* CNo used the Vnl and dVx/y/z to calculate forces: */
		VdagC = Vnl^C[k];

		/* x-forces */
		dVdagC = dVx^C[k];
		fvector = -2.0*w[k]*Mnl*diag(dVdagC*F[k]*herm_adjoint(VdagC));
		for (i=0; i < ioninfo->species[sp].natoms; i++)
		  ioninfo->species[sp].forces[i].v[0] += REAL(fvector.c[i]);

		/* y-forces */
		dVdagC = dVy^C[k];
		fvector = -2.0*w[k]*Mnl*diag(dVdagC*F[k]*herm_adjoint(VdagC));
		for (i=0; i < ioninfo->species[sp].natoms; i++)
		  ioninfo->species[sp].forces[i].v[1] += REAL(fvector.c[i]);

		/* z-forces */
		dVdagC = dVz^C[k];
		fvector = -2.0*w[k]*Mnl*diag(dVdagC*F[k]*herm_adjoint(VdagC));
		for (i=0; i < ioninfo->species[sp].natoms; i++)
		  ioninfo->species[sp].forces[i].v[2] += REAL(fvector.c[i]);

	      }
	  }

      }

  dft_log("\n");
  dft_log_flush();

}


/*
 * This routine calculates the forces on the atoms due to the derivate 
 * of the Ewald energy versus atomic positions.
 * 
 * The forces are accumulated into ioninfo->species[].forces[]
 *
 */
void
calc_ionic_forces_Ewald(Ioninfo *ioninfo,
			Basis *basis)
{
  int sp,i, r, m, howmany, atom_start, natoms;
  int NPROCS = System::Get_N_Procs();
  int MY_ID = System::Get_procID();
  vector3 *force_buf;

  dft_log("\n------ calc_ionic_forces_ewald() ------\n");

  /* loop over species */
  for (sp=0; sp < ioninfo->nspecies; sp++)
    {
      dft_log("species = %d   natoms = %d\n",
		sp,ioninfo->species[sp].natoms);
      dft_log_flush();

      // Parallelize with MPI
      natoms = ioninfo->species[sp].natoms;
      r = natoms % NPROCS;
      m = (natoms - r)/NPROCS;

      force_buf = (vector3 *)mymalloc(sizeof(vector3)*natoms,"force_buf[]",
				      "Vloc_force");

      if (MY_ID < r) { atom_start = MY_ID*(m+1); howmany = m+1; }
      else { atom_start = MY_ID*m+r; howmany = m; }

      if (howmany > 0) 
	for (i=atom_start; i<atom_start+howmany; i++)
	  force_buf[i] = ioninfo->species[sp].forces[i] - dEwald_datom_pos(ioninfo,basis->latvec,sp,i);

#ifdef  DFT_MPI
      // Now, got to communicate the forces result to everybody.
      // In principle, need to do bunch of broadcast calls.
      // For now, use a Allreduce call. Simple but may not be the best choice.
#ifdef DFT_PROFILING
      timerOn(13);  // Turn on other MPI_Allreduce timer
#endif // DFT_PROFILING
      MPI_Allreduce ( force_buf, &(ioninfo->species[sp].forces[0]),
		      3*natoms, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
#ifdef DFT_PROFILING
      timerOff(13);  // Turn off other MPI_Allreduce timer
#endif // DFT_PROFILING

#else // DFT_MPI
      for (i = 0; i < natoms; i++)
	ioninfo->species[sp].forces[i] = force_buf[i];
#endif // DFT_MPI

      myfree(force_buf);

    }

  dft_log("\n");
  dft_log_flush();
}


/*
 * This routine calculates the force-correction due to non-linear core correction
 * to the Exchange-Correlation energy.
 * 
 * Exc = (Jn)^(O(J(exc(n)))),  where n = n_core + n_valence
 *
 * and n_core is a functional of ion positions {R_i}
 *
 * f_sp_nat = - ((dn_core(G)/dR_i)^(O(J(exc(n)))+Idag pointwise_mult(Jdag(O(J(n))),excprime(n))))
 *
 * and  dn_core(G)/dR_i = -i G n_core(G) exp(-iG*R_i)
 */
void
calc_ionic_forces_Vxc(Elecvars *elecvars,
		      Ioninfo *ioninfo,
		      Basis *basis)
{
  if (elecvars->ncore.n <= 0) // no non-linear core correction is needed.
    return;

  int NxNyNz = basis->NxNyNz, sp, nat;
  vector &n = elecvars->n;
  n += elecvars->ncore;  // now n is the total charge density.

  dft_log("\n--- calc_atomic_forces_Vxc() ---\n");

  vector T(NxNyNz, basis), ncore(NxNyNz, basis);
  if (elecvars->ex_opt != DFT_EXCORR_LDA) // GGA
    T = O(J(exc(n)+exGCprime(n))) + Idag(pointwise_mult(Jdag_O_J(n),excprime(n)));
  else                                    // LDA
    T = O(J(exc(n))) + Idag(pointwise_mult(Jdag_O_J(n),excprime(n)));

  // now have to calculate the force on each atom.
  for (sp = 0; sp < ioninfo->nspecies; sp++) {
    if (ioninfo->species[sp].nl_core_flag) {  // only do this if the ion is core corrected.
      // prepare core charge for species sp.
      ncore_sp(basis,ioninfo->species[sp],ncore.c);

      for (nat = 0; nat < ioninfo->species[sp].natoms; nat++)
	ioninfo->species[sp].forces[nat] += 
	  ncore_force(basis,ioninfo->species[sp],nat,T,ncore);
    }
  }

  n -= elecvars->ncore;  // recover the valence charge density.

}


/*
 * Calculates the forces on the ions (in lattice coordinates).
 * The three energy terms that must be differentialted are the
 * local pseudopot. energy, the non-local pseudopot energy, and the
 * Ewald energy.
 *
 */
void
calc_ionic_forces(Elecinfo *elecinfo,
		  Elecvars *elecvars,
		  Ioninfo *ioninfo,
		  Basis *basis)
{
  int sp, nat;

  /* Zero out forces */
  for (sp=0; sp < ioninfo->nspecies; sp++)
    for (nat=0; nat < ioninfo->species[sp].natoms; nat++)
      ioninfo->species[sp].forces[nat] = 0.0;

  /* local pseudopot contribution */
  calc_ionic_forces_Vloc(elecvars,ioninfo,&basis[elecinfo->nkpts]);

  /* non-local pseudopot contribution */
  calc_ionic_forces_Vnl(elecinfo,elecvars,ioninfo,basis);

  /* Ewald contribution */
  calc_ionic_forces_Ewald(ioninfo,&basis[elecinfo->nkpts]);

  /* calculate corrections due to nlcore correction to XC if any */
  calc_ionic_forces_Vxc(elecvars,ioninfo,&basis[elecinfo->nkpts]);

  // Maxim added on 11/1/99 to implement line constraints on ion dynamics
  ioninfo->constrain_forces_to_lines();

}
