/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 * Code by Sohrab Ismail-Beigi,  Mar. 1997
 * 
 * Non-local pseudopotentials, with multiple-projector extension of
 * basic Kleinman-Bylander form.
 *
 */

/* $Id: nlpseudopot.c,v 1.1.1.1 1999/11/10 01:30:17 tairan Exp $ */

#include <stdio.h>
#include <math.h>
#include "header.h"


/*
 * Calculates the matrix elements of the non-local pseudopotential
 * (multiple-projector form) for the ion'th ion of species sp
 * for atomic state lm at k-vector kvec and returs the result
 * as a column bundle of ioninfo->species[sp].ngamma[lm] columns of
 * length basis->nbasis.  ngamma is the number of multiple projectors
 * for the species sp and state lm.
 *
 * Letting q = k + G, the matrix we are computing is Vnl with entries:
 * 
 * Vnl(q,gamma) = <q|V_gamma*phi_gamma>
 *
 * where |q> is a plane-wave state with <r|q> = exp(i*q*r)/sqrt(Vol)
 * and V_gamma is the non-local potential for the gamma'th state
 * and phi_gamma is the atomic wave-function for the gamma'th state.
 *
 * Now, V_gamma*phi_gamma(r) = V_l(|r|)*phi_l(|r|)*Ylm(rhat)
 *                                           (hat == unit vec.)
 *
 * and using the relation:
 *
 *   exp(iq.r) = sum_{l,m} { 4pi(i^l)j_l(|q|*|r|)Ylm*(qhat)Ylm(rhat) } 
 *
 * We arrivate at,
 *
 *   Vnl(q,gamma) = Y_lm(qhat)*(4*pi)*f_l(|q|)/sqrt(Vol)
 *
 * where
 *
 *   f_l(q) = integral{0,inf} { dr*r^2*V_l(r)*phi_l(r)*(i^l*j_l(|r|*|q|)) }
 *
 * We get f_l(q) from the internal tables stored in ioninfo via interpolation.
 *
 */
void
Vnl_pseudo(int sp,int ion,int lm,
	   vector3 kvec,
	   Basis *basis,
	   Ioninfo *ioninfo,
	   column_bundle &Vnl)
{
  complex S,Ylm;
  real dot,posx,posy,posz,kx,ky,kz,kplusGx,kplusGy,kplusGz;
  real qx,qy,qz,q,invq,qhatx,qhaty,qhatz;
  real flq=0.0,r,invdq=0.0;
  real GGTxx,GGTyy,GGTzz,GGTxy,GGTxz,GGTyz;
  real Vol,invsqrtVol;
  register real ym1,y0,y1,y2,a,b,c,x;
  int ngamma,gamma,n,j,nmax=0,l,m;
  matrix3 G;

  /* pi et al. */
  const real pi = M_PI;
  const real twopi = (real)2.0*pi;
  const real fourpi = (real)4.0*pi;
  const real sixth = (real)1.0/(real)6.0;
  /* Ylm normalization constants */
  const real N00  =  sqrt(1.0/(4.0*pi));
  const real N10  =  sqrt(3.0/(4.0*pi));
  const real N11  = -sqrt(3.0/(8.0*pi));
  const real N1m1 =  sqrt(3.0/(8.0*pi));
  const real N20  =  sqrt(5.0/(16.0*pi));
  const real N21  = -sqrt(15.0/(8.0*pi));
  const real N2m1 =  sqrt(15.0/(8.0*pi));
  const real N22  =  sqrt(15.0/(32.0*pi));
  const real N2m2 =  sqrt(15.0/(32.0*pi));

  Speciesinfo *sp_info = &(ioninfo->species[sp]);

  /* Vnl will contain the final result */
  ngamma = sp_info->ngamma[lm];
  if (Vnl.tot_ncols != ngamma)
    die("\nIn Vnl_pseudo(), ngamma != Vnl.ncols\n\n");
  if (Vnl.col_length != basis->nbasis)
    die("\nIn Vnl_pseudo(), nbasis != Vnl.col_length\n\n");
  Vnl.zero_out();

  /* Get ionic position, k-vector, cell volume, G, and GGT matrix entries */
  posx = sp_info->atpos[ion].v[0];
  posy = sp_info->atpos[ion].v[1];
  posz = sp_info->atpos[ion].v[2];
  kx = kvec.v[0];
  ky = kvec.v[1];
  kz = kvec.v[2];
  G = basis->G;
  GGTxx = basis->GGT.m[0][0];
  GGTyy = basis->GGT.m[1][1];
  GGTzz = basis->GGT.m[2][2];
  GGTxy = basis->GGT.m[0][1];
  GGTxz = basis->GGT.m[0][2];
  GGTyz = basis->GGT.m[1][2];
  Vol = basis->unit_cell_volume;
  invsqrtVol = (real)1.0/sqrt(Vol);

  /* Get l,m values for the potential */
  l = sp_info->l[lm];
  m = sp_info->m[lm];
  if (l < 0 || l > 2)
    die("l != {0,1,2} nonlocal pseudopotentials not supported yet!\n");
  if (m < -l || m > l)
    die("Nonlocal pseudopotential with |m| > l !?!?\n");

  /* Calculate the non-local pseudopotential matrix entries */
  for (gamma = 0; gamma < ngamma; gamma++)
    {
      invdq = (real)1.0/sp_info->dq_nl[lm][gamma];
      nmax = sp_info->ngrid_nl[lm][gamma]-3;

      for (n = 0; n < basis->nbasis; n++)
	{
	  /* q = k+G:  here q is in reciprocal lattice coordinates */
	  qx = kplusGx = kx + basis->Gx[n];
	  qy = kplusGy = ky + basis->Gy[n];
	  qz = kplusGz = kz + basis->Gz[n];
	  /* S = exp(-i*dot(q,pos)) */
	  dot = -twopi*(posx*qx + posy*qy + posz*qz);
	  S.x = cos(dot);
	  S.y = sin(dot);

	  
	  q = sqrt( qx*qx*GGTxx + qy*qy*GGTyy + qz*qz*GGTzz +
		    2.0*(qx*qy*GGTxy + qx*qz*GGTxz + qy*qz*GGTyz) );

	  /* Interpolate flq using internal tables */
	  r = q*invdq;
	  j = (int)r;
	  if (j > nmax)
	    {
	      dft_log(DFT_SILENCE,
		      "G=[%d %d %d] q=[%g %g %g] k=[%g %g %g]\n",
		      basis->Gx[n],basis->Gy[n],basis->Gz[n],
		      qx,qy,qz,kx,ky,kz);
	      die("Nonlocal pseudopotential grid is too small!\n");
	    }

	  /* Do cubic interlopation on flq with j-1, j, j+1, and j+2
	   * points from internal tables.  If j==0, we clearly can't
	   * real j-1.  However f_l(-q) = (-1)^l*f_l(q) so we can
	   * fill in the j-1 entry with the j+1 entry times (-1)^l */
	  if (j == 0)
	    ym1 = (1-2*(l%2))*sp_info->flq[lm][gamma][j+1];
	  else
	    ym1 = sp_info->flq[lm][gamma][j-1];
	  y0  = sp_info->flq[lm][gamma][j  ];
	  y1  = sp_info->flq[lm][gamma][j+1];
	  y2  = sp_info->flq[lm][gamma][j+2];
	  a = sixth*(y2-ym1+3.0*(y0-y1));
	  b = 0.5*(y1+ym1-y0-y0);
	  c = y1-sixth*(y2+3.0*y0+ym1+ym1);
	  x = r-j;
	  flq = ((a*x+b)*x+c)*x+y0;
	  /* The line below is quadratic interploation: */
	  /* flq = y0 + 0.5*x*(y1-ym1+x*(y1+ym1-y0-y0)); */

	  /* If we're at q=0, then we must handle the case separately
	   * since Ylm and qhat are not defined for q=0.  The cases are:
	   * if l==0, Ylm is const.
	   * if l>0, flq=0 anyways, so even though Ylm is undefined,
	   * it doesn't matter what it is since the final result is zero. */
	  if (q < (real)1.0e-13)
	    {
	      if (l==0) { Ylm.x = N00; Ylm.y = 0.0; }
	      else	{ Ylm.x = Ylm.y = 0.0; }
	      goto skipcalcYlm;
	    }


	  /* Replace the above if block by the one below to reporduce
	   * cgrad results. */
	  /* 	  if (j == 0) */
	  /* 	    { */
	  /* 	      flq = y0 + (y1-y0)*x; */
	  /* 	      if (l==0) { Ylm.x = N00; Ylm.y = 0.0; } */
	  /* 	      else	{ Ylm.x = Ylm.y = 0.0; } */
	  /* 	      goto skipcalcYlm; */
	  /* 	    } */


	  /* Otherwise, we have to calculate Ylm: 
	   * qhat is a unit vector in direction of q:  here q is in "real"
	   * bohr^-1 units and q = (k+G)*G where (k+G) is a row-vector. */
	  qx = kplusGx*G.m[0][0] + kplusGy*G.m[1][0] + kplusGz*G.m[2][0];
	  qy = kplusGx*G.m[0][1] + kplusGy*G.m[1][1] + kplusGz*G.m[2][1];
	  qz = kplusGx*G.m[0][2] + kplusGy*G.m[1][2] + kplusGz*G.m[2][2];
	  invq = (real)1.0/q;
	  qhatx = qx*invq;
	  qhaty = qy*invq;
	  qhatz = qz*invq;
	  /* Calculate Ylm */
	  if (l==0)
	    {
	      Ylm.x = N00;
	      Ylm.y = 0.0;
	    }
	  else if (l==1)
	    {
	      if (m==0)
		{
		  Ylm.x = N10*qhatz;
		  Ylm.y = 0.0;
		}
	      else if (m==1)
		{
		  Ylm.x = N11*qhatx;
		  Ylm.y = N11*qhaty;
		}
	      else if (m==-1)
		{
		  Ylm.x =  N1m1*qhatx;
		  Ylm.y = -N1m1*qhaty;
		}
	    }
	  else if (l==2)
	    {
	      if (m==0)
		{
		  Ylm.x = N20*(3.0*qhatz*qhatz-1.0);
		  Ylm.y = 0.0;
		}
	      else if (m==1)
		{
		  Ylm.x = N21*(qhatz*qhatx);
		  Ylm.y = N21*(qhatz*qhaty);
		}
	      else if (m==-1)
		{
		  Ylm.x = N2m1*( qhatz*qhatx);
		  Ylm.y = N2m1*(-qhatz*qhaty);
		}
	      else if (m==2)
		{
		  Ylm.x = N22*(qhatx*qhatx-qhaty*qhaty);
		  Ylm.y = N22*(2.0*qhatx*qhaty);
		}
	      else if (m==-2)
		{
		  Ylm.x = N2m2*(qhatx*qhatx-qhaty*qhaty);
		  Ylm.y = N2m2*(-2.0*qhatx*qhaty);
		}
	    }
	  else
	    die("nonlocal pseudopot: l ?!?!!\n");

	skipcalcYlm: ;
	  /* Calculate Vnl */
	  Vnl.col[gamma].c[n] = (S*Ylm);
	  Vnl.col[gamma].c[n] *= fourpi*flq*invsqrtVol;

	}
    }

  /* We know at least these two things...set them to be safe. */
  Vnl.basis = basis;
  Vnl.k = kvec;
}


/*
     Textbook formulae from Liboff:

  Y0,0  = 1/sqrt(4pi)

  Y1,0      = sqrt(3/(4*pi))*cos(theta)
  Y1,(+-)1  = (-+)sqrt(3/(8pi))*sin(theta)*exp((+-)i*phi)

  Y2,0      = sqrt(5/(16*pi))*(3*cos(theta)^2 -1)
  Y2,(+-)1  = (-+)sqrt(15/(8pi))(sin(theta)*cos(theta))*exp((+-)i*phi)
  Y2,(+-)2  = sqrt(15/(32*pi))(sin(theta)^2)*exp((+-)2i*phi)
*/

/*
 * This routine does what the above one does, and also fills in
 * dVnlx/y/z with the derivatives of Vnl versus the position of the
 * ion 'ion' of species 'sp' (lattice coords).
 */
void
dVnl_pseudo_datom_pos(int sp,int ion,int lm,
		      vector3 kvec,
		      Basis *basis,
		      Ioninfo *ioninfo,
		      column_bundle &Vnl,
		      column_bundle &dVnlx,
		      column_bundle &dVnly,
		      column_bundle &dVnlz )
{
  complex S,Ylm,negtwopiiVnl;
  real dot,posx,posy,posz,kx,ky,kz,kplusGx,kplusGy,kplusGz;
  real qx,qy,qz,q,invq,qhatx,qhaty,qhatz;
  real flq=0.0,r,invdq=0.0;
  real GGTxx,GGTyy,GGTzz,GGTxy,GGTxz,GGTyz;
  real Vol,invsqrtVol;
  register real ym1,y0,y1,y2,a,b,c,x;
  int ngamma,gamma,n,j,nmax=0,l,m;
  matrix3 G;

  /* pi et al. */
  const real pi = M_PI;
  const real twopi = (real)2.0*pi;
  const real fourpi = (real)4.0*pi;
  const real sixth = (real)1.0/(real)6.0;
  /* Ylm normalization constants */
  const real N00  =  sqrt(1.0/(4.0*pi));
  const real N10  =  sqrt(3.0/(4.0*pi));
  const real N11  = -sqrt(3.0/(8.0*pi));
  const real N1m1 =  sqrt(3.0/(8.0*pi));
  const real N20  =  sqrt(5.0/(16.0*pi));
  const real N21  = -sqrt(15.0/(8.0*pi));
  const real N2m1 =  sqrt(15.0/(8.0*pi));
  const real N22  =  sqrt(15.0/(32.0*pi));
  const real N2m2 =  sqrt(15.0/(32.0*pi));

  Speciesinfo *sp_info = &(ioninfo->species[sp]);

  /* Vnl will contain the final result */
  ngamma = sp_info->ngamma[lm];
  if (Vnl.tot_ncols != ngamma)
    die("\nIn Vnl_pseudo(), ngamma != Vnl.ncols\n\n");
  if (Vnl.col_length != basis->nbasis)
    die("\nIn Vnl_pseudo(), nbasis != Vnl.col_length\n\n");
  Vnl.zero_out();

  /* Get ionic position, k-vector, cell volume, G, and GGT matrix entries */
  posx = sp_info->atpos[ion].v[0];
  posy = sp_info->atpos[ion].v[1];
  posz = sp_info->atpos[ion].v[2];
  kx = kvec.v[0];
  ky = kvec.v[1];
  kz = kvec.v[2];
  G = basis->G;
  GGTxx = basis->GGT.m[0][0];
  GGTyy = basis->GGT.m[1][1];
  GGTzz = basis->GGT.m[2][2];
  GGTxy = basis->GGT.m[0][1];
  GGTxz = basis->GGT.m[0][2];
  GGTyz = basis->GGT.m[1][2];
  Vol = basis->unit_cell_volume;
  invsqrtVol = (real)1.0/sqrt(Vol);

  /* Get l,m values for the potential */
  l = sp_info->l[lm];
  m = sp_info->m[lm];
  if (l < 0 || l > 2)
    die("l != {0,1,2} nonlocal pseudopotentials not supported yet!\n");
  if (m < -l || m > l)
    die("Nonlocal pseudopotential with |m| > l !?!?\n");

  /* Calculate the non-local pseudopotential matrix entries */
  for (gamma = 0; gamma < ngamma; gamma++)
    {
      invdq = (real)1.0/sp_info->dq_nl[lm][gamma];
      nmax = sp_info->ngrid_nl[lm][gamma]-3;

      for (n = 0; n < basis->nbasis; n++)
	{
	  /* q = k+G:  here q is in reciprocal lattice coordinates */
	  qx = kplusGx = kx + basis->Gx[n];
	  qy = kplusGy = ky + basis->Gy[n];
	  qz = kplusGz = kz + basis->Gz[n];
	  /* S = exp(-i*dot(q,pos)) */
	  dot = -twopi*(posx*qx + posy*qy + posz*qz);
	  S.x = cos(dot);
	  S.y = sin(dot);

	  q = sqrt( qx*qx*GGTxx + qy*qy*GGTyy + qz*qz*GGTzz +
		    2.0*(qx*qy*GGTxy + qx*qz*GGTxz + qy*qz*GGTyz) );

	  /* Interpolate flq using internal tables */
	  r = q*invdq;
	  j = (int)r;
	  if (j > nmax)
	    {
	      dft_log(DFT_SILENCE,
		      "G=[%d %d %d] q=[%g %g %g] k=[%g %g %g]\n",
		      basis->Gx[n],basis->Gy[n],basis->Gz[n],
		      qx,qy,qz,kx,ky,kz);
	      die("Nonlocal pseudopotential grid is too small!\n");
	    }

	  /* Do cubic interlopation on flq with j-1, j, j+1, and j+2
	   * points from internal tables.  If j==0, we clearly can't
	   * real j-1.  However f_l(-q) = (-1)^l*f_l(q) so we can
	   * fill in the j-1 entry with the j+1 entry times (-1)^l */
	  if (j == 0)
	    ym1 = (1-2*(l%2))*sp_info->flq[lm][gamma][j+1];
	  else
	    ym1 = sp_info->flq[lm][gamma][j-1];
	  y0  = sp_info->flq[lm][gamma][j  ];
	  y1  = sp_info->flq[lm][gamma][j+1];
	  y2  = sp_info->flq[lm][gamma][j+2];
	  a = sixth*(y2-ym1+3.0*(y0-y1));
	  b = 0.5*(y1+ym1-y0-y0);
	  c = y1-sixth*(y2+3.0*y0+ym1+ym1);
	  x = r-j;
	  flq = ((a*x+b)*x+c)*x+y0;
	  /* The line below is quadratic interploation: */
	  /* flq = y0 + 0.5*x*(y1-ym1+x*(y1+ym1-y0-y0)); */

	  /* If we're at q=0, then we must handle the case separately
	   * since Ylm and qhat are not defined for q=0.  The cases are:
	   * if l==0, Ylm is const.
	   * if l>0, flq=0 anyways, so even though Ylm is undefined,
	   * it doesn't matter what it is since the final result is zero. */
	  if (q < (real)1.0e-13)
	    {
	      if (l==0) { Ylm.x = N00; Ylm.y = 0.0; }
	      else	{ Ylm.x = Ylm.y = 0.0; }
	      goto skipcalcYlm;
	    }

	  /* Otherwise, we have to calculate Ylm: 
	   * qhat is a unit vector in direction of q:  here q is in "real"
	   * bohr^-1 units and q = (k+G)*G where (k+G) is a row-vector. */
	  qx = kplusGx*G.m[0][0] + kplusGy*G.m[1][0] + kplusGz*G.m[2][0];
	  qy = kplusGx*G.m[0][1] + kplusGy*G.m[1][1] + kplusGz*G.m[2][1];
	  qz = kplusGx*G.m[0][2] + kplusGy*G.m[1][2] + kplusGz*G.m[2][2];
	  invq = (real)1.0/q;
	  qhatx = qx*invq;
	  qhaty = qy*invq;
	  qhatz = qz*invq;
	  /* Calculate Ylm */
	  if (l==0)
	    {
	      Ylm.x = N00;
	      Ylm.y = 0.0;
	    }
	  else if (l==1)
	    {
	      if (m==0)
		{
		  Ylm.x = N10*qhatz;
		  Ylm.y = 0.0;
		}
	      else if (m==1)
		{
		  Ylm.x = N11*qhatx;
		  Ylm.y = N11*qhaty;
		}
	      else if (m==-1)
		{
		  Ylm.x =  N1m1*qhatx;
		  Ylm.y = -N1m1*qhaty;
		}
	    }
	  else if (l==2)
	    {
	      if (m==0)
		{
		  Ylm.x = N20*(3.0*qhatz*qhatz-1.0);
		  Ylm.y = 0.0;
		}
	      else if (m==1)
		{
		  Ylm.x = N21*(qhatz*qhatx);
		  Ylm.y = N21*(qhatz*qhaty);
		}
	      else if (m==-1)
		{
		  Ylm.x = N2m1*( qhatz*qhatx);
		  Ylm.y = N2m1*(-qhatz*qhaty);
		}
	      else if (m==2)
		{
		  Ylm.x = N22*(qhatx*qhatx-qhaty*qhaty);
		  Ylm.y = N22*(2.0*qhatx*qhaty);
		}
	      else if (m==-2)
		{
		  Ylm.x = N2m2*(qhatx*qhatx-qhaty*qhaty);
		  Ylm.y = N2m2*(-2.0*qhatx*qhaty);
		}
	    }
	  else
	    die("nonlocal pseudopot: l ?!?!!\n");
	skipcalcYlm: ;
	  /* Calculate Vnl */
	  Vnl.col[gamma].c[n] = (S*Ylm);
	  Vnl.col[gamma].c[n] *= fourpi*flq*invsqrtVol;
	  /* dVnl(k+G)/dtau[j] = -2*pi*i*(k[j]+G[j])*Vnl(k+G) */
	  negtwopiiVnl.x =  twopi*Vnl.col[gamma].c[n].y;
	  negtwopiiVnl.y = -twopi*Vnl.col[gamma].c[n].x;
	  dVnlx.col[gamma].c[n] = kplusGx*negtwopiiVnl;
	  dVnly.col[gamma].c[n] = kplusGy*negtwopiiVnl;
	  dVnlz.col[gamma].c[n] = kplusGz*negtwopiiVnl;
	}
    }

  /* We know at least these two things...set them to be safe. */
  Vnl.basis = dVnlx.basis = dVnly.basis = dVnlz.basis = basis;
  Vnl.k = dVnlx.k = dVnly.k = dVnlz.k = kvec;
}
