/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 * Tairan Wang,               Apr 29, 1999
 *
 *
 * This file houses the code that implements nonlinear core correction.
 *
 * Ref:
 *  1. S. G. Louie, S. Froyen, and M. L. Cohen, Phys. Rev. B 26, 1738 (1982).
 *
 */

/* $Id: nlcore.c,v 1.2 1999/11/11 02:40:31 tairan Exp $ */

#include <stdio.h>
#include "header.h"


/*
 * The non-linear core correction to the charge density
 * while calculating exchange-correlation
 */
void nlcore_setup(const Ioninfo &ioninfo, Elecvars &elecvars)
{
  vector &ncore = elecvars.ncore;
  Basis *basis = elecvars.n.basis;
  Speciesinfo *sp_info;
  int Nx = basis->Nx, Ny = basis->Ny, Nz = basis->Nz, NyNz = Ny*Nz;
  real GGTxx = basis->GGT.m[0][0], 
    GGTyy = basis->GGT.m[1][1], 
    GGTzz = basis->GGT.m[2][2], 
    GGTxy = basis->GGT.m[0][1], 
    GGTxz = basis->GGT.m[0][2], 
    GGTyz = basis->GGT.m[1][2];
  int Nx2 = Nx/2, Ny2 = Ny/2, Nz2 = Nz/2;
  int sp, ir, m[3], ngrid, index;
  real *fun, *fun1, clog, *r, *core, G, arg;
  const real fourpi = (real)4.0*M_PI;


  // Allocate temporary work space.
  ngrid = 0;
  for (sp=0; sp<ioninfo.nspecies; sp++) {
    sp_info = &(ioninfo.species[sp]);
    if ( (sp_info->nl_core_flag) &&
	 (sp_info->n_core_grid > ngrid) ) {
      ngrid = sp_info->n_core_grid;
    }
  }
  if (ngrid > 0) {

    fun = (real*)mymalloc(sizeof(real)*ngrid,"fun","nlcore_setup");
    fun1 = (real*)mymalloc(sizeof(real)*ngrid,"fun1","nlcore_setup");

    // Calculate the G-space ncore, summed over species:
    // ncore_total(G) = sum(ncore_total(G,sp));
    ncore.zero_out();
    for (sp = 0; sp < ioninfo.nspecies; sp++) {
      sp_info = &(ioninfo.species[sp]);

      if ( sp_info->nl_core_flag ) {

	ngrid = sp_info->n_core_grid;
	clog  = sp_info->clog_core_grid;
	r     = sp_info->core_r;
	core  = sp_info->n_core;

	// ncore(sp,r) -> ncore(sp,G) ;
	for (ir=0; ir < ngrid; ir++) {
	  fun[ir] = (r[ir]*r[ir]*r[ir])*core[ir]*fourpi;
	}
	for (m[0]=-Nx2+1; m[0] < Nx2; m[0]++) 
	  for (m[1]=-Ny2+1; m[1] < Ny2; m[1]++)
	    for (m[2]=-Nz2+1; m[2] < Nz2; m[2]++) {
	      if (m[0]==0 && m[1]==0 && m[2]==0) {	    /* G = 0  case */
		// formf_at(0,sp) = simps(spinfo->fhi_ngrid[0], fun, spinfo->fhi_clog[0]);
		ncore.c[0] += simps(ngrid, fun, clog) * sp_info->natoms;
		
	      } else {
		index = 0;
		if (m[2] < 0) index += m[2]+Nz;        else index += m[2];
		if (m[1] < 0) index += Nz*(m[1]+Ny);   else index += Nz*m[1];
		if (m[0] < 0) index += NyNz*(m[0]+Nx); else index += NyNz*m[0];
		G = sqrt(m[0]*m[0]*GGTxx + m[1]*m[1]*GGTyy + m[2]*m[2]*GGTzz +
			 2*(m[0]*m[1]*GGTxy + m[0]*m[2]*GGTxz + m[1]*m[2]*GGTyz));
		for (ir=0; ir < ngrid; ir++) {
		  arg = r[ir] * G;
		  fun1[ir] = fun[ir] * sin(arg)/arg;
		}
		// formf_at(index,sp) = simps(spinfo->fhi_ngrid[0],fun1,spinfo->fhi_clog[0]);
		ncore.c[index] += simps(ngrid, fun1, clog) 
		  * structfact(sp_info->natoms, sp_info->atpos, m );
	      }
	    }
      }
    }

    // report total core charge as in  ncore[G=0] 
    dft_log(">>nlcore_setup: ncore[G=0] = %g\n", REAL(ncore.c[0]));

    // Convert to real space and save:
    // ncore_total(r) = FFTW(ncore_total(G));
    ncore = I(ncore);
    ncore *= 1.0/sqrt(ncore.basis->unit_cell_volume);

    // Free space
    myfree(fun);
    myfree(fun1);

    // report total core charge from real space integral:
    real sum = REAL(sum_vector(ncore));
    sum = sum * ncore.basis->unit_cell_volume / ncore.n;
    dft_log(">>nlcore_setup: total core electrons = %g\n",
	    sum);

  }

  return;
}


/*
 * Calculates  n_core_sp(G) = -i n_core(G)
 */
void ncore_sp(Basis *basis, Speciesinfo &spinfo, complex *n_core_sp)
{
  int Nx,Ny,Nz,NxNyNz, Nx2,Ny2,Nz2;
  real *r, *core, *fun, *fun1, clog, G2, G, arg;
  int ngrid, ir, n[3], beta[3], index, i, j;
  const real fourpi = (real)4.0*M_PI;

  /* Copy Nx, Ny, Nz from basis */
  Nx = basis->Nx;  Nx2 = Nx/2;
  Ny = basis->Ny;  Ny2 = Ny/2;
  Nz = basis->Nz;  Nz2 = Nz/2;
  NxNyNz = Nx * Ny * Nz;

  ngrid = spinfo.n_core_grid;
  clog  = spinfo.clog_core_grid;
  r     = spinfo.core_r;
  core  = spinfo.n_core;
  fun   =  (real*)mymalloc(sizeof(real)*ngrid,"fun","dncore_datom_pos");
  fun1  =  (real*)mymalloc(sizeof(real)*ngrid,"fun","dncore_datom_pos");
  for (ir = 0; ir < ngrid; ir++)
    fun[ir] = (r[ir]*r[ir]*r[ir])*core[ir]*fourpi;

  /* Loop over FFT box */
  for (n[0] = -Nx2+1; n[0] < Nx2; n[0]++)
    for (n[1] = -Ny2+1; n[1] < Ny2; n[1]++)
      for (n[2] = -Nz2+1; n[2] < Nz2; n[2]++) {

	/* G != 0 */
	if (n[0] != 0 || n[1] != 0 || n[2] != 0)
	  {

	    if (n[0] < 0) beta[0] = n[0]+Nx;
	    else          beta[0] = n[0];
	    if (n[1] < 0) beta[1] = n[1]+Ny;
	    else          beta[1] = n[1];
	    if (n[2] < 0) beta[2] = n[2]+Nz;
	    else          beta[2] = n[2];
	  
	    index = beta[2]+Nz*(beta[1]+Ny*beta[0]);
	      
	    G2 = (real)0.0;
	    for (i=0; i < 3; i++)
	      for (j=0; j < 3; j++)
		G2 += (real)(n[i]*n[j])*basis->GGT.m[i][j];
	    G = sqrt(G2);

	    // calculate the form factor from raw data.
	    for (ir=0; ir < ngrid; ir++) {
	      arg = r[ir] * G;
	      fun1[ir] = fun[ir] * sin(arg)/arg;
	    }
	    n_core_sp[index].y = -simps(ngrid, fun1, clog);
	  } else { 

	    // G=0 component
	    n_core_sp[0].y = -simps(ngrid, fun, clog);
	  }
      } /* loop on FFT box */

  // Free temporary space.
  myfree(fun);
  myfree(fun1);

  return;
}


/*
 * Calculates force on one atom:
 * T = O(J(exc(n))) + Idag(pointwise_mult(Jdag_O_J(n),excprime(n)))
 * ncore_sp =  -i n_core_sp(G)
 *
 * force = - REAL( sum( T(G) ncore_sp(G) exp(-iG tau) G ) )
 */
vector3 ncore_force(Basis *basis, Speciesinfo &spinfo, int nat,
		    vector &T, vector &ncore_sp)
{
  vector3 f(0.0,0.0,0.0);
  int n[3], beta[3], index, Nx,Ny,Nz, NxNyNz, Nx2,Ny2,Nz2;
  real taux, tauy, tauz, phase, tmp;
  complex Stau;
  const real twopi = (real)2.0*M_PI;

  /* Copy Nx, Ny, Nz from basis */
  Nx = basis->Nx;  Nx2 = Nx/2;
  Ny = basis->Ny;  Ny2 = Ny/2;
  Nz = basis->Nz;  Nz2 = Nz/2;
  NxNyNz = Nx * Ny * Nz;
  
  /* Loop over FFT box */
  for (n[0] = -Nx2+1; n[0] < Nx2; n[0]++)
    for (n[1] = -Ny2+1; n[1] < Ny2; n[1]++)
      for (n[2] = -Nz2+1; n[2] < Nz2; n[2]++) {
	/* G != 0 */
	if (n[0] != 0 || n[1] != 0 || n[2] != 0) {

	  if (n[0] < 0) beta[0] = n[0]+Nx;
	  else          beta[0] = n[0];
	  if (n[1] < 0) beta[1] = n[1]+Ny;
	  else          beta[1] = n[1];
	  if (n[2] < 0) beta[2] = n[2]+Nz;
	  else          beta[2] = n[2];
	  
	  index = beta[2]+Nz*(beta[1]+Ny*beta[0]);
	      
	  /* Stau = exp(-2*pi*i*dot(G,tau)) */
	  taux = spinfo.atpos[nat].v[0];
	  tauy = spinfo.atpos[nat].v[1];
	  tauz = spinfo.atpos[nat].v[2];
	  phase = -twopi*(n[0]*taux + n[1]*tauy + n[2]*tauz);
	  Stau.x = cos(phase);
	  Stau.y = sin(phase);

	  // f[j] +=  - REAL ( T[G] * ncore_sp[G] * Stau * (2pi * n[j])
	  tmp = REAL( T.c[index] * ncore_sp.c[index] * Stau );
	  tmp *= twopi;

	  f.v[0] -= tmp * n[0];
	  f.v[1] -= tmp * n[1];
	  f.v[2] -= tmp * n[2];
	}
      }

  return f;
}


/*
 * Calculates   dn_core(G)/dR_sp_nat = -i G n_core(G) exp(-iG*R_sp_nat)
 */
/*
void dncore_datom_pos(Basis *basis, Ioninfo *ioninfo, int sp, int nat,
		      complex *dncorex, complex *dncorey, complex *dncorez)
{
  Speciesinfo *spinfo = &ioninfo->species[sp];
  int Nx,Ny,Nz,NxNyNz, Nx2,Ny2,Nz2;
  real svol, *r, *core, *fun, *fun1, clog, G2, G, arg, ncore_G;
  real taux, tauy, tauz, phase;
  complex Stau;
  int ngrid, ir, n[3], beta[3], index, i, j;
  const real twopi = (real)2.0*M_PI;
  const real fourpi = (real)4.0*M_PI;
  real fourpi_omega = 2*twopi/basis->unit_cell_volume;


  // Copy Nx, Ny, Nz from basis
  Nx = basis->Nx;  Nx2 = Nx/2;
  Ny = basis->Ny;  Ny2 = Ny/2;
  Nz = basis->Nz;  Nz2 = Nz/2;
  NxNyNz = Nx * Ny * Nz;

  // Calculate 1/sqrt(unitcellvol)
  svol = (real)1.0/sqrt(basis->unit_cell_volume);

  ngrid = spinfo->fhi_ngrid[0];
  clog  = spinfo->fhi_clog[0];
  r     = spinfo->fhi_core_r;
  core  = spinfo->fhi_n_core;
  fun = (real*)mymalloc(sizeof(real)*ngrid,"fun","dncore_datom_pos");
  fun1 = (real*)mymalloc(sizeof(real)*ngrid,"fun","dncore_datom_pos");
  for (ir = 0; ir < ngrid; ir++)
    fun[ir] = (r[ir]*r[ir]*r[ir])*core[ir]*fourpi;

  // Loop over FFT box
  for (n[0] = -Nx2+1; n[0] < Nx2; n[0]++)
    for (n[1] = -Ny2+1; n[1] < Ny2; n[1]++)
      for (n[2] = -Nz2+1; n[2] < Nz2; n[2]++) {

	// G != 0
	if (n[0] != 0 || n[1] != 0 || n[2] != 0)
	  {

	    if (n[0] < 0) beta[0] = n[0]+Nx;
	    else          beta[0] = n[0];
	    if (n[1] < 0) beta[1] = n[1]+Ny;
	    else          beta[1] = n[1];
	    if (n[2] < 0) beta[2] = n[2]+Nz;
	    else          beta[2] = n[2];
	  
	    index = beta[2]+Nz*(beta[1]+Ny*beta[0]);
	      
	    G2 = (real)0.0;
	    for (i=0; i < 3; i++)
	      for (j=0; j < 3; j++)
		G2 += (real)(n[i]*n[j])*basis->GGT.m[i][j];
	    G = sqrt(G2);

	    // calculate the form factor from raw data.
	    for (ir=0; ir < ngrid; ir++) {
	      arg = r[ir] * G;
	      fun1[ir] = fun[ir] * sin(arg)/arg;
	    }
	    ncore_G = simps(ngrid, fun1, clog);
	      
	    // Stau = exp(-2*pi*i*dot(G,tau))
	    taux = spinfo->atpos[nat].v[0];
	    tauy = spinfo->atpos[nat].v[1];
	    tauz = spinfo->atpos[nat].v[2];
	    phase = -twopi*(n[0]*taux + n[1]*tauy + n[2]*tauz);
	    Stau.x = cos(phase);
	    Stau.y = sin(phase);

	    // dn_core_j = -i * (2pi * n[j]) n_core(G) * Stau 
	    dncorex[index].x =  n[0]*twopi*ncore_G*Stau.y;
	    dncorex[index].y = -n[0]*twopi*ncore_G*Stau.x;

	    dncorey[index].x =  n[1]*twopi*ncore_G*Stau.y;
	    dncorey[index].y = -n[1]*twopi*ncore_G*Stau.x;

	    dncorez[index].x =  n[2]*twopi*ncore_G*Stau.y;
	    dncorez[index].y = -n[2]*twopi*ncore_G*Stau.x;
	  }
	else 
	  { // G=0 components are 0.0.
	    dncorex[0].x = dncorex[0].y = 0.0;
	    dncorey[0].x = dncorey[0].y = 0.0;
	    dncorez[0].x = dncorez[0].y = 0.0;
	  }
      } // loop on FFT box 

  return;
}

*/
