/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 * ewald.c:   Sohrab Ismail-Beigi     Jan 31, 1997,  May 12 1997
 *
 * Calculates the Ewald energy for a set of ions, etc.
 *
 */

/* $Id: ewald.c,v 1.3 2000/02/03 06:20:32 tairan Exp $ */

#include <stdio.h>
#include <math.h>
#include "header.h"

/*
 * Global variables that control how far
 * out in R- and G-space the lattice sums go for the ewald energy.
 * Sums are done from _start to _end in x,y, and z directions.
 */
int Ewald_setup_was_done = 0;
int Nlat_start_real, Nlat_end_real,Nlat_start_recip, Nlat_end_recip;

/*
 * Reads in the file ewald_filename and gets the parameters (listed above)
 * from the file.
 *
 * The format of the ewald file is simple:
 * -> Lines starting with '#' are comments and are ignored.
 * Two lines with 2 integers on each are read to specify the R- and G-space
 * lattice sum sizes:
 *
 *      Nlat_start_real   Nlat_end_real
 *      Nlat_start_recip  Nlat_end_recip
 *
 * The real and recip. space sums go from -Nlat to Nlat in each of x,y, and z
 * directions, with Nlat going from Nlat_start to Nlat_end.
 *
 */
void
setup_Ewald(char *ewald_filename)
{
  dft_text_FILE *ewald_file;
  char line[200];

  dft_log("\n------ setup_ewald() ------\n");
  dft_log("Reading file '%s'\n",ewald_filename);

  if ( (ewald_file = dft_text_fopen(ewald_filename,"r")) ==
       (dft_text_FILE *)NULL)
    die("Can't read ewald file %s!!!\n",ewald_filename);

  /* Read the sum-size parameters */
  skip_comment(ewald_file,line);
  sscanf(line,"%d %d",&Nlat_start_real,&Nlat_end_real);
  skip_comment(ewald_file,line);
  sscanf(line,"%d %d",&Nlat_start_recip,&Nlat_end_recip);
  dft_text_fclose(ewald_file);

  dft_log("Nlat_start_real  = %d      Nlat_end_real  = %d\n",
	  Nlat_start_real,Nlat_end_real);
  dft_log("Nlat_start_recip = %d      Nlat_end_recip = %d\n",
	  Nlat_start_recip,Nlat_end_recip);
  dft_log("\n");
  dft_log_flush();

  Ewald_setup_was_done = 1;
}

void
setup_Ewald(int nlat_s_real, int nlat_e_real,
	    int nlat_s_recip, int nlat_e_recip)
{
  Nlat_start_real  = nlat_s_real;
  Nlat_end_real    = nlat_e_real;
  Nlat_start_recip = nlat_s_recip;
  Nlat_end_recip   = nlat_e_recip;
  
  dft_log("\n------ setup_ewald() ------\n");
  dft_log("Nlat_start_real  = %d      Nlat_end_real  = %d\n",
	  Nlat_start_real,Nlat_end_real);
  dft_log("Nlat_start_recip = %d      Nlat_end_recip = %d\n",
	  Nlat_start_recip,Nlat_end_recip);
  dft_log("\n");
  dft_log_flush();

  Ewald_setup_was_done = 1;
}


/*
 * Retrieve the ewald parameters.
 */
int get_ewald(int &nlat_s_real, int &nlat_e_real,
	       int &nlat_s_recip, int &nlat_e_recip)
{
  if (! Ewald_setup_was_done) return 0;

  nlat_s_real = Nlat_start_real;
  nlat_e_real = Nlat_end_real;
  nlat_s_recip = Nlat_start_recip;
  nlat_e_recip = Nlat_end_recip;
  return 1;
}


/*
 * Calculates the Ewald energy per unit cell 
 * for natoms atoms of charges Z[0..natoms-1]
 * emersed in a uniform compensating charg density sum_i(Z[i])/Vol
 * where Vol = det(R) = unit cell volume.
 * atpos[0..natoms-1][3] is in lattice coordinates.
 * R[][] contains the lattice vectors in the columns.
 *
 * With tau denoting an atom in the basis,
 * the energy is Ewald = 0.5*sum_{tau} { Z[tau]*phi[tau] }
 * where phi[tau] is the electrostatic potential caused by ALL OTHER ions
 * everywhere and the uniform compensating background.
 *
 * The energy comes in two parts:  real-space sum of screened point-charges
 * (screend by gaussians), and a G-space sum of the potential of 
 * Guassians in a uniform background; there are also some constants from
 * "renormalization" effects (cutoffs going to infinity in a controlled way).
 *
 * We calculate the above sums in R- and G-space by summing the values
 * in a box of size [-Nlat,Nlat]^3...Nlat is run through a set of values
 * to check for convergence.
 *
 */
real
Ewald(Ioninfo *ioninfo,matrix3 R)
{
  /* Constants */
  real const pi = M_PI,
             twopi = 2.0*pi,
             fourpi = 4.0*pi;

  /* Local vars */
  real *Z;
  vector3 *atpos;
  int natoms;
  matrix3 RTR,G,GGT;
  vector3 x;
  real vol,G2,r,temp,sigma,eta;
  real angle,SG[2];
  real Ereal,Erecip,Etot;
  int sp,i,j,k,l,tau,taup,cell[3],Nlat;

  if (!Ewald_setup_was_done)
    die("Ewald() was not setup!!!\n");

  /* Take out all the ions from the Ioninfo structure and put them into
   * a long list of Z[] and atpos[] values */
  natoms = 0;
  for (sp=0; sp < ioninfo->nspecies; sp++)
    natoms += ioninfo->species[sp].natoms;
  Z = (real *)mymalloc(sizeof(real)*natoms,"Z","Ewald()");
  atpos = (vector3 *)mymalloc(sizeof(vector3)*natoms,"atpos","Ewald()");
  k = 0;
  for (sp=0; sp < ioninfo->nspecies; sp++)
    for (j=0; j < ioninfo->species[sp].natoms; j++)
      {
	Z[k] = ioninfo->species[sp].Z;
	atpos[k] = ioninfo->species[sp].atpos[j];
	k++;
      }

  /* Unit cell volume */
  vol = fabs(det3(R));

  /* Calcuate matrix of dot-products of lattice vectors */
  RTR = (~R)*R;

  /* Calculate G=2*pi*inv(R): recip. lattice vectors in rows of G */
  G = twopi*inv3(R);

  /* Calculate dot products of recip. lattice vectors */
  GGT = G*(~G);

  dft_log("\n------ Ewald() -----\n");
  dft_log("latvec = \n");
  R.print(dft_global_log,"%lg ");
  dft_log("natoms = %d\n",natoms);
  dft_log(DFT_ANAL_LOG,"Z = [ ");
  for (i=0; i < natoms; i++)
    dft_log(DFT_ANAL_LOG,"%lg ",Z[i]);
  dft_log(DFT_ANAL_LOG,"]\natpos=\n");
  if (dft_global_log->get_level() >= DFT_ANAL_LOG)
    for (i=0; i < natoms; i++)
      atpos[i].print(dft_global_log,"%lg ");

  /* set width of gaussian to 0.4 the nearest-neighbor distance */
  /* Here I'll loop over the cells close to the origin and find
   * the minimal distance. */
  sigma = sqrt(RTR.m[0][0]);
  for (i=-2; i<=2; i++)
    for (j=-2; j<=2; j++)
      for (k=-2; k<=2; k++)
	if ( i!=0 || j!=0 || k!=0 )
	  {
	    x.v[0] = i; x.v[1] = j; x.v[2] = k;
	    r = sqrt(x*(RTR*x));
	    if (r < sigma)
	      sigma = r;
	  }
  sigma *= 0.4;

  /* set scale of width of gaussian to roughly the interatomic distance */
/*   sigma = 0.4*pow(vol/(real)natoms,1.0/3.0); */


  eta = 1.0/(sqrt(2.0)*sigma);
  dft_log("Using sigma = %lg   eta = %lg for gaussian\n\n", sigma,eta);

  /* Real-space part of energy:  (1) constant parts */
  Ereal = (real)0.0;
  for (tau=0; tau < natoms; tau++)
    for (taup=0; taup < natoms; taup++)
      {
	dft_log(DFT_NERD_LOG,
		"\nReal space potential for tau=%d taup=%d\n",
		tau,taup);
	/* The constant part of the energy from "renomalization" */
	temp = -0.5*Z[tau]*Z[taup]*pi/(vol*eta*eta);
	/* If tau==tau', then add the "negative" potential of gaussian at
	 * tau */
	if (tau == taup)
	  temp += -0.5*Z[tau]*Z[taup]*2.0*eta/sqrt(pi);
	Ereal += temp;
	dft_log(DFT_NERD_LOG,"Constant part = %le\n",temp);
      }
  /* Real-space part of energy:  (2) lattice sums over screened ion pairs */
  /* loop over size of lattice sums */
  for (Nlat = 0; Nlat <= Nlat_end_real; Nlat++)
    {
      for (tau=0; tau < natoms; tau++)
	for (taup=0; taup < natoms; taup++)
	  {
	    /* loop over cells */
	    for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
	      for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
		for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
		  /* For each value of Nlat, we only sum over the cells
		   * which have one coordinate == +/-Nlat, i.e. the surfaces
		   * of the cube of points running [-Nlat,Nlat] in each
		   * direction. */
		  if ( abs(cell[0])==Nlat ||
		       abs(cell[1])==Nlat ||
		       abs(cell[2])==Nlat    )
		    /* Only exclude the cell==0 and tau==tau' term */
		    if (tau!=taup || cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
		      {
			/* Find the distance |cell+tau'-tau| between atom at
			 * tau and the other atom being considered.
			 * x is this cell+tau'-tau vector in lattice coords;
			 * r is its actual length in real distance units*/
			for (l=0; l < 3; l++)
			  x.v[l] = cell[l] +
			           atpos[taup].v[l] - atpos[tau].v[l];
			r = sqrt(x*(RTR*x));
			temp = 0.5*Z[tau]*Z[taup]*erfc(eta*r)/r;
			Ereal += temp;
			dft_log(DFT_NERD_LOG,
				"cell=[%d %d %d] r =%lg e = %le\n",
				cell[0],cell[1],cell[2],r,temp);
		      }
	  } /* tau' loop */

	  dft_log("Nlat = %2d  Real-space energy = %25.15le\n",
		  Nlat,Ereal);
	  dft_log_flush();

    } /* Nlat loop */

  /*
   * Reciprocal space contribution:
   * Erecip = 0.5*sum_{G!=0}
   *            {4*pi*exp(-|G|^2/(4*eta^2))/(vol*|G|^2)*|S(G)|^2}
   * where S(G) = sum_{tau} { Z[tau]*exp(-i*G*r_tau) }
   *
   * r_tau = R*tau (R is matrix, tau is 3-vector) and
   *     G = cell*G (second G is matrix, cell is row-vector of integers)
   * so G*r_tau = 2*pi*cell*tau.
   */
  Erecip = (real)0.0;
  for (Nlat = 1; Nlat <= Nlat_end_recip; Nlat++)
    {
      for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
	for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
	  for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
	    /* For each value of Nlat, we only sum over the cells
	     * which have one coordinate == +/-Nlat, i.e. the surfaces
	     * of the cube of points running [-Nlat,Nlat] in each
	     * direction. */
	    if ( abs(cell[0])==Nlat ||
		 abs(cell[1])==Nlat ||
		 abs(cell[2])==Nlat    )
	      /* Skip G=0 */
	      if (cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
		{
		  /* Calculate structure factor */
		  SG[0] = SG[1] = 0.0;
		  for (tau=0; tau < natoms; tau++)
		    {
		      angle = -twopi*(cell[0]*atpos[tau].v[0]+
				      cell[1]*atpos[tau].v[1]+
				      cell[2]*atpos[tau].v[2]  );
		      SG[0] += Z[tau]*cos(angle);
		      SG[1] += Z[tau]*sin(angle);
		    }
		  /* Calculate |G|^2 */
		  G2 = GGT.m[0][0]*cell[0]*cell[0] + 
		    GGT.m[1][1]*cell[1]*cell[1] + 
		    GGT.m[2][2]*cell[2]*cell[2] + 
		    2.0*( GGT.m[0][1]*cell[0]*cell[1] +
			  GGT.m[0][2]*cell[0]*cell[2] +
			  GGT.m[1][2]*cell[1]*cell[2]    );
		  /* The energy for G */
		  temp = 0.5*fourpi*exp(-G2/(4.0*eta*eta))/(G2*vol)*
		    (SG[0]*SG[0]+SG[1]*SG[1]);
		  Erecip += temp;

		  dft_log(DFT_NERD_LOG,
			  "G=[%d %d %d] G2 =%lg  e = %le\n",
			  cell[0],cell[1],cell[2],G2,temp);
		}

      dft_log("Nlat = %2d  Reciprocal space energy = %25.15le\n",
		Nlat,Erecip);
      dft_log_flush();

    } /* of Nlat loop */
  Etot = Ereal + Erecip;

  dft_log("\nEwald energy = %25.15le\n\n",Etot);
  dft_log_flush();

  myfree(Z);
  myfree(atpos);
  return Etot;
}

/*
 * Derivative of Ewald energy versus the position of the atom 'atom' of
 * species 'species' (lattice coordinates).
 */
vector3
dEwald_datom_pos(Ioninfo *ioninfo,matrix3 R,const int species,const int atom)
{
  /* Constants */
  real const pi = M_PI,
             twopi = 2.0*pi,
             fourpi = 4.0*pi,
             sqrtpi = sqrt(pi);

  /* Local vars */
  real *Z;
  vector3 *atpos;
  int natoms;
  matrix3 RTR,G,GGT;
  real vol,sigma,eta;
  int sp,i,j,k,l,cell[3],Nlat,taup;
  int tau; /* the index of the atom corresponding to 'species' and 'atom' */
  vector3 result(0.0,0.0,0.0);   /* holds the final result */

  if (!Ewald_setup_was_done)
    die("Ewald() was not setup!!!\n");

  /* Take out all the ions from the Ioninfo structure and put them into
   * a long list of Z[] and atpos[] values */
  natoms = 0;
  for (sp=0; sp < ioninfo->nspecies; sp++)
    natoms += ioninfo->species[sp].natoms;
  Z = (real *)mymalloc(sizeof(real)*natoms,"Z","dEwald_datom_pos()");
  atpos = (vector3 *)mymalloc(sizeof(vector3)*natoms,
			      "atpos","dEwald_datom_pos()");
  k = 0;
  tau = -1;
  for (sp=0; sp < ioninfo->nspecies; sp++)
    for (j=0; j < ioninfo->species[sp].natoms; j++)
      {
	Z[k] = ioninfo->species[sp].Z;
	atpos[k] = ioninfo->species[sp].atpos[j];
	/* Find the index correspoding to species/atom and store it in tau */
	if (sp == species && j == atom)
	  tau = k;
	k++;
      }
  if (tau == -1)
    die("dEwlad_datom_pos():  no atom corresponding to requested deriv!!!\n");

  /* Unit cell volume */
  vol = fabs(det3(R));

  /* Calcuate matrix of dot-products of lattice vectors */
  RTR = (~R)*R;

  /* Calculate G=2*pi*inv(R): recip. lattice vectors in rows of G */
  G = twopi*inv3(R);

  /* Calculate dot products of recip. lattice vectors */
  GGT = G*(~G);

  /* set width of gaussian to 0.4 the nearest-neighbor distance */
  /* Here I'll loop over the cells close to the origin and find
   * the minimal distance. */
  sigma = sqrt(RTR.m[0][0]);
  for (i=-2; i<=2; i++)
    for (j=-2; j<=2; j++)
      for (k=-2; k<=2; k++)
	if ( i!=0 || j!=0 || k!=0 )
	  {
	    vector3 x;
	    real r;

	    x.v[0] = i; x.v[1] = j; x.v[2] = k;
	    r = sqrt(x*(RTR*x));
	    if (r < sigma)
	      sigma = r;
	  }
  sigma *= 0.4;
  eta = 1.0/(sqrt(2.0)*sigma);

  /* Real-space part of derivative */
  /* Loop over atoms and cells */
  Nlat = Nlat_end_real;
  for (taup=0; taup < natoms; taup++)
    for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
      for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
	for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
	  /* Only exclude the cell==0 and tau==tau' term */
	  if (tau!=taup || cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
	    {
	      real temp,r;
	      vector3 x,RTRx;

	      /* Find the distance |cell+tau'-tau| between atom at tau
	       * and the other atom being considered.
	       * x is this cell+tau'-tau vector in lattice coords;
	       * r is its actual length in real distance units*/
	      for (l=0; l < 3; l++)
		x.v[l] = cell[l] + atpos[tau].v[l] - atpos[taup].v[l];
	      RTRx = RTR*x;
	      r = sqrt(x*RTRx);
	      temp = -Z[tau]*Z[taup]*(erfc(eta*r)/(r*r) +
			 	      2.0*eta*exp(-eta*eta*r*r)/(r*sqrtpi) )/r;
	      for (l=0; l < 3; l++)
		result.v[l] += temp*RTRx.v[l];
	    }

  /*
   * Reciprocal space contribution.
   */
  Nlat = Nlat_end_recip;
  for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
    for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
      for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
	/* Skip G=0 */
	if (cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
	  {
	    complex SG,Stau;
	    real G2,temp,angle;
	    int taup;
	    
	    /* Calculate structure factor */
	    SG.x = SG.y = 0.0;
	    for (taup=0; taup < natoms; taup++)
	      {
		angle = -twopi*(cell[0]*atpos[taup].v[0]+
				cell[1]*atpos[taup].v[1]+
				cell[2]*atpos[taup].v[2]  );
		SG.x += Z[taup]*cos(angle);
		SG.y += Z[taup]*sin(angle);
	      }

	    /* Structure factor for tau alone: Ztau*exp(-i*G.tau) */
	    angle = -twopi*(cell[0]*atpos[tau].v[0]+ 
			    cell[1]*atpos[tau].v[1]+
			    cell[2]*atpos[tau].v[2]  );
	    Stau.x = Z[tau]*cos(angle);
	    Stau.y = Z[tau]*sin(angle);
	    
	    /* Calculate |G|^2 */
	    G2 = GGT.m[0][0]*cell[0]*cell[0] + 
	      GGT.m[1][1]*cell[1]*cell[1] + 
	      GGT.m[2][2]*cell[2]*cell[2] + 
	      2.0*( GGT.m[0][1]*cell[0]*cell[1] +
		    GGT.m[0][2]*cell[0]*cell[2] +
		    GGT.m[1][2]*cell[1]*cell[2]    );
	    
	    /* The contribution for G */
	    temp = fourpi*exp(-G2/(4.0*eta*eta))/(G2*vol);
	    temp *= twopi*(SG.x*Stau.y-SG.y*Stau.x);
	    for (l=0; l < 3; l++)
	      result.v[l] += temp*cell[l];
	  }

  myfree(Z);
  myfree(atpos);

  /* return the fruits of our labors */
  return result;
}
