/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 *     Sohrab Ismail-Beigi             Original code, Sept. 9, 1996
 *                                     Revised, Jan. 1997
 *
 * Calculate exchange-correlation energy and its derivative versus
 * the electron (number) density.
 *
 */

/* $Id: exc.c,v 1.1.1.1 1999/11/10 01:30:17 tairan Exp $ */

#include <stdio.h>
#include <math.h>
#include "header.h"

/*
 * Return the exchange-correlation energy density for density n.
 *
 * The formula for Exc for rs < 1 are the standard expansions in
 * rs; that for rs > 1 was copied from a code by Fatih Yanik.
 *
 */
vector
exc(vector &n)
{
  /* constants */
  const real pi = M_PI,
    X1 = 0.75*pow(3.0/(2.0*pi),2.0/3.0),  /* Exchange energy coeff */
    AA =  0.0622/2.0, /* Correl. energy rs < 1 expansion coeffs */
    BB = -0.0960/2.0,
    CC =  0.0040/2.0,
    DD = -0.0232/2.0,
    GA = -0.2846/2.0, /* Coerrel. energy rs > 1 expansion coeffs */  
    B1 =  1.0529,
    B2 =  0.3334;
  
  /* Temporary variables */
  real *rs;
  int i;
  vector exc(n);

  /* Calculate rs */
  rs = (real *)mymalloc(sizeof(real)*n.n,"rs","exc");
  for (i=0; i < n.n; i++)
    {
      if (REAL(n.c[i]) <= 0.0)
	rs[i] = 1.0e15;
      else
	rs[i] = pow(4.0*pi/3.0*REAL(n.c[i]),-1.0/3.0);
    }

  /* Calculate exc */
  for (i=0; i < n.n; i++)
    {
      /* Exchange part */
      exc.c[i] = -X1/rs[i];
      /* Correlation part */
      if (rs[i] > 1.0)
        exc.c[i] += GA/(1.0 + B1*sqrt(rs[i]) + B2*rs[i] );
      else
	exc.c[i] += AA*log(rs[i]) + BB + CC*rs[i]*log(rs[i]) + DD*rs[i];
    }

  myfree(rs);
  return exc;
}

/*
 * Derivative of above versus n
 */
vector
excprime(vector &n)
{
  /* constants */
  const real pi = M_PI,
    X1 = 0.75*pow(3.0/(2.0*pi),2.0/3.0),  /* Exchange energy coeff */
    AA =  0.0622/2.0, /* Correl. energy rs < 1 expansion coeffs */
    // BB = -0.0960/2.0,
    CC =  0.0040/2.0,
    DD = -0.0232/2.0,
    GA = -0.2846/2.0, /* Coerrel. energy rs > 1 expansion coeffs */  
    B1 =  1.0529,
    B2 =  0.3334;

  /* temp. vars */
  real n_i,*rs;
  int i;
  vector excprime(n);

  /* Calculate rs */
  rs = (real *)mymalloc(sizeof(real)*n.n,"rs","excprime");
  for (i=0; i < n.n; i++)
    {
      n_i = REAL(n.c[i]);
      if (n_i <= 0.0)
	rs[i] = 1.0e15;
      else
	rs[i] = pow(4.0*pi/3.0*n_i,-1.0/3.0);
    }

  /* Calculate dexc/dn:  drs/dn = -rs/(3*n) is handy */
  for (i=0; i < n.n; i++)
    {
      n_i = REAL(n.c[i]);
      /* Exchange part */
      excprime.c[i] = -X1/(3.0*rs[i]*n_i);
      /* Correlation part */
      if (rs[i] > 1.0)
	excprime.c[i] += GA*(0.5*B1*sqrt(rs[i])+B2*rs[i])/
	  (3.0*n_i*pow(1.0+B1*sqrt(rs[i])+B2*rs[i],2.0));
      else
	excprime.c[i] += 
	  -(AA + CC*rs[i]*(1.0+log(rs[i])) + DD*rs[i])/(3.0*n_i);
    }

  myfree(rs);
  return excprime;
}



/**********************************************************
 *                                                        *
 *    New code for Generalized Gradient Correction        *
 *                                                        *
 *********************************************************/


/* The following Generalized gradient correction 
 * is based on  PW91
 *
 */

#include "parallel.h"
#define IMAG(z) ((z).y)

// cutoff charge density: to avoid true vacuum.
#define CHD_CUTOFF  1e-12 

//
// input: d - density
//        s - abs(grad d)/(2*kf*d)
// output: gradient correction to exchange energy per electron.
//
static real
exe(real d, real s)
{
  // constants block
  const real a1 = 0.19645;
  const real a2 = 0.27430;
  const real a3 = 0.15084;
  const real a4 = 100.0;
  const real ax = -0.7385588;
  const real a = 7.7956;
  const real b1 = 0.004;
  const real thrd = 1.0/3.0;

  // local variable block
  real fac;
  real s2, s3, s4;
  real p0, p1, p2, p3, p4;
  
  fac = ax*pow(d,thrd);
  s2 = s*s;
  s3 = s2*s;
  s4 = s2*s2;
  p0 = 1.0/sqrt(1.0+a*a*s2);
  p1 = log(a*s+1.0/p0);
  p2 = exp(-a4*s2);
  p3 = 1.0/(1.0+a1*s*p1+b1*s4);
  p4 = 1.0+a1*s*p1+(a2-a3*p2)*s2;
  
  return (fac*(p3*p4-1.0));
}

//
// input: d - density
//        s - abs(grad d)/(2*kf*d)
//        u - (grad d)*grad(abs(grad d))/(d^2*(2*kf)^3)
//        v - lap(d)/(d*(2*kf)^2)
// output: gradient correction to exchange potential per electron.
//
static real
exvx(real d, real s, real u, real v)
{
  // constants block
  const real a1 = 0.19645;
  const real a2 = 0.27430;
  const real a3 = 0.15084;
  const real a4 = 100.0;
  const real ax = -0.7385588;
  const real a = 7.7956;
  const real b1 = 0.004;
  const real thrd = 1.0/3.0;
  const real thrd4 = 4.0/3.0;

  // local variable block
  real fac;
  real s2, s3, s4;
  real p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11;
  real f;
  real fss;
  real fs;

  fac = ax*pow(d,thrd);
  s2 = s*s;
  s3 = s2*s;
  s4 = s2*s2;
  p0 = 1.0/sqrt(1.0+a*a*s2);
  p1 = log(a*s+1.0/p0);
  p2 = exp(-a4*s2);
  p3 = 1.0/(1.0+a1*s*p1+b1*s4);
  p4 = 1.0+a1*s*p1+(a2-a3*p2)*s2;
  f = p3*p4;
  p5 = b1*s2-(a2-a3*p2);
  p6 = a1*s*(p1+a*s*p0);
  p7 = 2.0*(a2-a3*p2)+2.0*a3*a4*s2*p2-4.0*b1*s2*f;
  fs = p3*(p3*p5*p6+p7);
  p8 = 2.0*s*(b1-a3*a4*p2);
  p9 = a1*p1+a*a1*s*p0*(3.0-a*a*s2*p0*p0);
  p10 = 4.0*a3*a4*s*p2*(2.0-a4*s2)-8.0*b1*s*f-4.0*b1*s3*fs;
  p11 = -p3*p3*(a1*p1+a*a1*s*p0+4.0*b1*s3);
  fss = p3*p3*(p5*p9+p6*p8)+2.0*p3*p5*p6*p11+p3*p10+p7*p11;

  return fac*(thrd4*(f-1.0)-(u-thrd4*s3)*fss-v*fs);
}


//
// input : a, a1, b1, b2, b3, b4, p, rs
//         rs - seitz radius
// output: gg, ggrs (implicit!!)
//
static void
gcor(real a, real a1, real b1, real b2, real b3, real b4, real p, real rs,
     real &gg, real &ggrs)
{
  // local variables
  real p1;
  real rs12, rs32, rsp;
  real q0, q1, q2, q3;

  p1 = p + 1.0;
  q0 = -2.0*a*(1.0+a1*rs);
  rs12 = sqrt(rs);
  rs32 = rs12*rs12*rs12;
  rsp = pow(rs,p);
  q1 = 2.0*a*(b1*rs12+b2*rs+b3*rs32+b4*rs*rsp);
  q2 = log(1.0+1.0/q1);
  gg = q0*q2;
  q3 = a*(b1/rs12+2.0*b2+3.0*b3*rs12+2.0*b4*p1*rsp);
  ggrs = -2.0*a*a1*q2-q0*q3/(q1*q1+q1);

  return;
}

//
// input : rs - seitz radius
//         zet - relative polarization (here assume zet = 0.0)
// output : ec - local correlation energy per electron
//          vcup, vcdn - local up & down correlational potential
//          ecrs - derivative of ec w.r.t rs
//          eczet - derivative of ec w.r.t zet
//          alfc - correlation contribution to the spin stiffness
//
static void
corlsd(real rs, real zet, real &ec, real &vcup, real &vcdn,
       real &ecrs, real &eczet, real &alfc)
{
  // constants block
  const real gam = 0.5198421;
  const real fzz = 1.709921;
  const real thrd = 1.0/3.0;
  const real thrd4 = 4.0/3.0;

  // local variables
  real f;
  real eu, eurs;
  real ep, eprs;
  real alfm, alfrsm;
  real z4;
  real fz;
  real comm;

  f = (pow(1.0+zet,thrd4)+pow(1.0-zet,thrd4)-2.0)/gam;
  gcor(0.03109070,0.213700,7.59570,3.58760,1.63820,
       0.492940,1.000,rs,eu,eurs);
  gcor(0.015545350,0.205480,14.11890,6.19770,3.36620,
       0.625170,1.000,rs,ep,eprs);
  gcor(0.01688690,0.111250,10.3570,3.62310,0.880260,
       0.496710,1.000,rs,alfm,alfrsm);
  //  alfm is minus the spin stiffness alfc
  alfc = -alfm;
  z4 = (zet*zet)*(zet*zet);
  ec = eu*(1.0-f*z4)+ep*f*z4-alfm*f*(1.0-z4)/fzz;
  //  energy done. now the potential:
  ecrs = eurs*(1.0-f*z4)+eprs*f*z4-alfrsm*f*(1.0-z4)/fzz;
  fz = thrd4*(pow(1.0+zet,thrd)-pow(1.0-zet,thrd))/gam;
  eczet = 4.0*(zet*zet*zet)*f*(ep-eu+alfm/fzz)+fz*(z4*ep-z4*eu
						   -(1.0-z4)*alfm/fzz);
  comm = ec -rs*ecrs/3.0-zet*eczet;
  vcup = comm + eczet;
  vcdn = comm - eczet;

  return;
}

// 
// input : rs - seitz radius
//         zet - relative polarization
//         t - abs(grad d)/(d*2*ks*g) (g=ga=1.0 here)
//         g (=1.0)
//         ec - local correlation energy
//         sk - ks
//         fk - kf
// output: non-local correlation energy per electron
//
static real 
core(real rs, real zet, real t, real g, real ec, real sk, real fk)
{
  // constants block
  const real xnu = 15.75592;
  const real cc0 = 0.004235;
  const real cx = -0.001667212;
  const real alf = 0.09;
  const real c1 = 0.002568;
  const real c2 = 0.023266;
  const real c3 = 7.389e-6;
  const real c4 = 8.723;
  const real c5 = 0.472;
  const real c6 = 7.389e-2;
  const real a4 = 100.0;
  //  const real thrm = -1.0/3.0;
  //  const real thrd2 = 2.0/3.0;
  
  // local variables
  real bet;
  real delt;
  real g3, g4;
  real pon;
  real b, b2;
  real t2, t4, t6;
  real rs2, rs3;
  real q4, q5, q6, q7;
  real cc;
  real r0, r1, r2, r3;
  real coeff;
  real h0, h1;

  bet = xnu*cc0;
  delt = 2.0*alf/bet;
  g3 = g*g*g;
  g4 = g3*g;
  pon = -delt*ec/(g3*bet);
  b = delt/(exp(pon)-1.0);
  b2 = b*b;
  t2 = t*t;
  t4 = t2*t2;
  t6 = t4*t2;
  rs2 = rs*rs;
  rs3 = rs2*rs;
  q4 = 1.0+b*t2;
  q5 = 1.0+b*t2+b2*t4;
  q6 = c1+c2*rs+c3*rs2;
  q7 = 1.0+c4*rs+c5*rs2+c6*rs3;
  cc = -cx + q6/q7;
  r0 = (sk/fk);
  r0 *= r0;
  r1 = a4*r0*g4;
  coeff = cc-cc0-3.0*cx/7.0;
  r2 = xnu*coeff*g3;
  r3 = exp(-r1*t2);
  h0 = g3*(bet/delt)*log(1.0+delt*q4*t2/q5);
  h1 = r3*r2*t2;
    
  return (h0+h1);
}

// 
// input : rs - seitz radius
//         zet - relative polarization
//         t - abs(grad d)/(d*2*ks*g) (g=ga=1.0 here)
//         uu - (grad d)*grad(abs(grad d))/(d^2*(2*ks*g)^3)
//         vv - lap(d)/(d*(2*ks*g)^2)
//         ww - (grad d)*(grad zet)/(d*(2*ks*g)^2) (=0.0 here)
//         g (=1.0)
//         ec - local correlation energy
//         ecrs - derivative of ec w.r.t rs
//         sk - ks
//         fk - kf
// output: non-local correlation energy per electron
//
static real 
corvx(real rs, real zet, real t, real uu, real vv, real ww,
      real g, real ec, real eczet, real ecrs, real sk, real fk)
{
  // constants block
  const real xnu = 15.75592;
  const real cc0 = 0.004235;
  const real cx = -0.001667212;
  const real alf = 0.09;
  const real c1 = 0.002568;
  const real c2 = 0.023266;
  const real c3 = 7.389e-6;
  const real c4 = 8.723;
  const real c5 = 0.472;
  const real c6 = 7.389e-2;
  const real a4 = 100.0;
  const real thrdm = -1.0/3.0;
  const real thrd2 = 2.0/3.0;
  
  // local variables
  real bet;
  real delt;
  real g3, g4;
  real pon;
  real b, b2;
  real t2, t4, t6;
  real rs2, rs3;
  real q4, q5, q6, q7, q8, q9;
  real cc;
  real r0, r1, r2, r3, r4;
  real coeff;
  real h0, h1, h, h0b, h0rs, h0bt, h0rst, h0z, h0t, h0zt, h0tt, h1rs, h1rst;

  // Some version of Unix predefines "hz" (!?!#&@) as some number
  // of Hertz... this caused compilation problems on the SP2.
#ifdef hz
#undef hz
#endif

  real h1z, h1t, h1zt, h1tt, hrs, hrst, ht, htt, hz, hzt;
  real ccrs;
  real rsthrd;
  real gz, fac;
  real bg, bec;
  real fact0, fact1, fact2, fact3, fact4, fact5;
  real comm, pref;

  bet = xnu*cc0;
  delt = 2.0*alf/bet;
  g3 = g*g*g;
  g4 = g3*g;
  pon = -delt*ec/(g3*bet);
  b = delt/(exp(pon)-1.0);
  b2 = b*b;
  t2 = t*t;
  t4 = t2*t2;
  t6 = t4*t2;
  rs2 = rs*rs;
  rs3 = rs2*rs;
  q4 = 1.0+b*t2;
  q5 = 1.0+b*t2+b2*t4;
  q6 = c1+c2*rs+c3*rs2;
  q7 = 1.0+c4*rs+c5*rs2+c6*rs3;
  cc = -cx + q6/q7;
  r0 = (sk/fk);
  r0 *= r0;
  r1 = a4*r0*g4;
  coeff = cc-cc0-3.0*cx/7.0;
  r2 = xnu*coeff*g3;
  r3 = exp(-r1*t2);
  h0 = g3*(bet/delt)*log(1.0+delt*q4*t2/q5);
  h1 = r3*r2*t2;
  h = h0 + h1;

  ccrs = (c2+2.0*c3*rs)/q7 - q6*(c4+2.0*c5*rs+3.0*c6*rs2)/(q7*q7);
  rsthrd = rs/3.0;
  r4 = rsthrd*ccrs/coeff;
  
  /* 
     if(abs(zet) .ge. 1.d0) then
        if(zet .lt. 0.d0) zet=-1.d0+1.d-15
        if(zet .gt. 0.d0) zet=1.d0-1.d-15
        if(flag .eqv. .true.) write(*,50)
        flag=.false.
  50    format(/'warning: corrga  -  zet substituted')
      endif
  */
  gz = (pow(1.0+zet,thrdm) - pow(1.0-zet,thrdm))/3.0;
  fac = delt/b+1.0;
  bg = -3.0*b2*ec*fac/(bet*g4);
  bec = b2*fac/(bet*g3);
  q8 = q5*q5+delt*q4*q5*t2;
  q9 = 1.0+2.0*b*t2;
  h0b = -bet*g3*b*t6*(2.0+b*t2)/q8;
  h0rs = -rsthrd*h0b*bec*ecrs;
  fact0 = 2.0*delt-6.0*b;
  fact1 = q5*q9+q4*q9*q9;
  h0bt = 2.0*bet*g3*t4*((q4*q5*fact0-delt*fact1)/q8)/q8;
  h0rst = rsthrd*t2*h0bt*bec*ecrs;
  h0z = 3.0*gz*h0/g + h0b*(bg*gz+bec*eczet);
  h0t = 2.0*bet*g3*q9/q8;
  h0zt = 3.0*gz*h0t/g+h0bt*(bg*gz+bec*eczet);
  fact2 = q4*q5+b*t2*(q4*q9+q5);
  fact3 = 2.0*b*q5*q9+delt*fact2;
  h0tt = 4.0*bet*g3*t*(2.0*b/q8-(q9*fact3/q8)/q8);
  h1rs = r3*r2*t2*(-r4+r1*t2/3.0);
  fact4 = 2.0-r1*t2;
  h1rst = r3*r2*t2*(2.0*r4*(1.0-r1*t2)-thrd2*r1*t2*fact4);
  h1z = gz*r3*r2*t2*(3.0-4.0*r1*t2)/g;
  h1t = 2.0*r3*r2*(1.0-r1*t2);
  h1zt = 2.0*gz*r3*r2*(3.0-11.0*r1*t2+4.0*r1*r1*t4)/g;
  h1tt = 4.0*r3*r2*r1*t*(-2.0+r1*t2);
  hrs = h0rs+h1rs;
  hrst = h0rst+h1rst;
  ht = h0t+h1t;
  htt = h0tt+h1tt;
  hz = h0z+h1z;
  hzt = h0zt+h1zt;
  comm = h+hrs+hrst+t2*ht/6.0+7.0*t2*t*htt/6.0;
  pref = hz-gz*t2*ht/g;
  fact5 = gz*(2.0*ht+t*htt)/g;
  comm = comm-pref*zet-uu*htt-vv*ht-ww*(hzt-fact5);

  return (comm+pref);
}



vector
exGC(vector& n)
{
  int Nx,Ny,Nz,Nx2,Ny2,Nz2,NyNz;
  register int i,j,k,index;
  Basis *basis = n.basis;

  Nx = basis->Nx;
  Ny = basis->Ny;
  Nz = basis->Nz;
  Nx2 = Nx/2;
  Ny2 = Ny/2;
  Nz2 = Nz/2;
  NyNz = Ny * Nz;
  int NxNyNz = basis->NxNyNz;

  real *d;
  d = (real *) mymalloc(sizeof(real)*n.n, "d", "exGC");

  vector exGC(n); // used temporary sometimes...
  basis = n.basis;
  vector Jn(basis->NxNyNz, basis);
  // regulating charge density
  /* 
   * one of the two ways in doing the cutoff.
   *  The relevant value in GGA is  grad(n)/n
   *  Big error can occur if n is too small.
   * 1. One way of cuting off is what's done here,
   *   by providing a lower bound on n, so that
   *   we deal with grad(n_new)/n_new.
   * 2. The second way of doing this is by dealing
   *   with  grad(n)/n_new.
   * or you can provide your own versions here.
   */
  for (i = 0; i < NxNyNz; i++) {
    if (REAL(n.c[i]) < CHD_CUTOFF)
      exGC.c[i] = CHD_CUTOFF;
    else
      exGC.c[i] = n.c[i];
  }
  apply_J(exGC, Jn);
  
  real g0, g1, g2;
  int l;
  
  real dtemp;
  for (i = 0; i < NxNyNz; i++)
    d[i] = 0.0;

  for (l = 0; l < 3; l++) {
    g0 = basis->G.m[0][l];
    g1 = basis->G.m[1][l];
    g2 = basis->G.m[2][l];
    for (i=-Nx2; i < Nx2; i++)
      for (j=-Ny2; j < Ny2; j++)
	for (k=-Nz2; k < Nz2; k++)
	  {
	    index = 0;
	    if (k < 0) index += k+Nz;        else index += k;
	    if (j < 0) index += Nz*(j+Ny);   else index += Nz*j;
	    if (i < 0) index += NyNz*(i+Nx); else index += NyNz*i;
	    exGC.c[index] = (i * g0 + j * g1 + k * g2) * Jn.c[index];
	  }
    exGC = I(exGC);
    for (i = 0; i < NxNyNz; i++) {
      dtemp = IMAG(exGC.c[i]);
      d[i] += dtemp * dtemp;
    }
  }

  // Preparing Variables 
  real rho, rs, kf, ks, t, sa;
  real fk, sk;
  real zet, ga, ww;
  real ec, vcup, vcdn, ecrs, eczet, alfc;

  const real cc0 = 1.0/3.0;
  const real cc1 = 1.919158293;
  const real cc2 = 1.563185284;
  const real cc3 = 0.6203504909;

  for (i = 0; i < NxNyNz; i++) {
    rho = REAL(n.c[i]);
    if (rho < CHD_CUTOFF) rho = CHD_CUTOFF;
    rs = cc3 * pow(rho, -cc0 );
    kf = cc1 / rs;
    ks = cc2 / sqrt(rs);

    // consistency
    fk = kf;
    sk = ks;

    // spin polarization not implemented
    zet = 0.0;
    ga = 1.0;
    ww = 0.0;

    t = sqrt(d[i]);
    sa = t / (2.0 * kf * rho);

    corlsd(rs, zet, ec, vcup, vcdn, ecrs, eczet, alfc);
    t = t/(2.0*rho*ks*ga);
    exGC.c[i] = exe(rho, sa) + core(rs, zet, t, ga, ec, sk, fk);
  }

  myfree(d);
  return exGC;
}

vector
exGCprime (vector& n)
{
  int Nx,Ny,Nz,Nx2,Ny2,Nz2,NyNz;
  register int i,j,k,index;
  Basis *basis = n.basis;

  Nx = basis->Nx;
  Ny = basis->Ny;
  Nz = basis->Nz;
  Nx2 = Nx/2;
  Ny2 = Ny/2;
  Nz2 = Nz/2;
  NyNz = Ny * Nz;
  int NxNyNz = basis->NxNyNz;

  real *(d[3]);
  real *uu;
  real *vv;
  for (i = 0; i < 3; i++)
    d[i] = (real *) mymalloc(sizeof(real)*n.n, "d[]", "exGCprime");  
  uu = (real *) mymalloc(sizeof(real)*n.n, "uu", "exGC");
  vv = (real *) mymalloc(sizeof(real)*n.n, "vv", "exGC");

  vector exGCprime(n); // used temporary sometimes...
  basis = n.basis;
  vector Jn(basis->NxNyNz, basis);
  // regulating charge density
  for (i = 0; i < NxNyNz; i++) {
    if (REAL(n.c[i]) < CHD_CUTOFF)
      exGCprime.c[i] = CHD_CUTOFF;
    else
      exGCprime.c[i] = n.c[i];
  }
  apply_J(exGCprime, Jn);

  apply_J(n, Jn);
  
  real g0, g1, g2, q0, q1, q2, gl, gm;
  int l, m;

  for (l = 0; l < 3; l++) {
    g0 = basis->G.m[0][l];
    g1 = basis->G.m[1][l];
    g2 = basis->G.m[2][l];
    for (i=-Nx2; i < Nx2; i++)
      for (j=-Ny2; j < Ny2; j++)
	for (k=-Nz2; k < Nz2; k++)
	  {
	    index = 0;
	    if (k < 0) index += k+Nz;        else index += k;
	    if (j < 0) index += Nz*(j+Ny);   else index += Nz*j;
	    if (i < 0) index += NyNz*(i+Nx); else index += NyNz*i;
	    exGCprime.c[index] = (i * g0 + j * g1 + k * g2) * Jn.c[index];
	  }
    exGCprime = I(exGCprime);
    for (i = 0; i < NxNyNz; i++)
      d[l][i] = IMAG(exGCprime.c[i]);
  }
  
  for (i = 0; i < NxNyNz; i++)
    vv[i] = uu[i] = 0.0;
  
  // diagonal part
  
  real ddtemp;
  for (l = 0; l < 3; l++) {
    g0 = basis->G.m[0][l];
    g1 = basis->G.m[1][l];
    g2 = basis->G.m[2][l];
    for (i=-Nx2; i < Nx2; i++)
      for (j=-Ny2; j < Ny2; j++)
	for (k=-Nz2; k < Nz2; k++)
	  {
	    index = 0;
	    if (k < 0) index += k+Nz;        else index += k;
	    if (j < 0) index += Nz*(j+Ny);   else index += Nz*j;
	    if (i < 0) index += NyNz*(i+Nx); else index += NyNz*i;
	    gl = (i * g0 + j * g1  + k * g2);
	    exGCprime.c[index] = - gl * gl * Jn.c[index];
	  }
    exGCprime = I(exGCprime);
    for (i = 0; i < NxNyNz; i++) {
      ddtemp = REAL(exGCprime.c[i]);
      vv[i] += ddtemp;
      uu[i] += d[l][i] * d[l][i] * ddtemp;
    }
      
    for (m = l + 1; m < 3; m++) {
      q0 = basis->G.m[0][m];
      q1 = basis->G.m[1][m];
      q2 = basis->G.m[2][m];
      for (i=-Nx2; i < Nx2; i++)
	for (j=-Ny2; j < Ny2; j++)
	  for (k=-Nz2; k < Nz2; k++)
	    {
	      index = 0;
	      if (k < 0) index += k+Nz;        else index += k;
	      if (j < 0) index += Nz*(j+Ny);   else index += Nz*j;
	      if (i < 0) index += NyNz*(i+Nx); else index += NyNz*i;
	      gl = (i * g0 + j * g1  + k * g2);
	      gm = (i * q0 + j * q1  + k * q2);
	      exGCprime.c[index] = - gl * gm * Jn.c[index];
	    }
      exGCprime = I(exGCprime);
      for (i = 0; i < NxNyNz; i++) {
	ddtemp = REAL(exGCprime.c[i]);
	uu[i] += 2.0 * d[l][i] * d[m][i] * ddtemp;
      }
    }
  }



  // Preparing Variables 
  real rho, sa, rs, kf, ks, t;
  real fk, sk;
  real zet, ga, ww;
  real ec, vcup, vcdn, ecrs, eczet, alfc;
  real u, ub, delta, v;

  const real cc0 = 1.0/3.0;
  const real cc1 = 1.919158293;
  const real cc2 = 1.563185284;
  const real cc3 = 0.6203504909;

  for (i = 0; i < NxNyNz; i++) {
    rho = REAL(n.c[i]);
    if (rho < CHD_CUTOFF) rho = CHD_CUTOFF;
    rs = cc3 * pow(rho, -cc0);
    kf = cc1 / rs;
    ks = cc2/ sqrt(rs);

    // consistency
    fk = kf;
    sk = ks;

    // spin polarization not implemented
    zet = 0.0;
    ga = 1.0;
    ww = 0.0;

    t = 0.0;
    for (j = 0; j < 3; j++)
      t += d[j][i] * d[j][i];
    t = sqrt(t);
    
    if (t > 0.0) {
      u = uu[i] / (t * rho * rho * 8.0 * kf * kf * kf);
      ub = uu[i] / t;
    }
    else {
      u = 0.0;
      uu[i] = 0.0;
      ub = 0.0;
    }

    delta = vv[i];
    v = delta / (rho * 4.0 * kf * kf);
    vv[i] = vv[i] / (rho*pow(2.0*ks*ga,2));
    sa = t / (2.0 * kf * rho);
    
    corlsd(rs, zet, ec, vcup, vcdn, ecrs, eczet, alfc);    
    ub = ub/(rho*rho*pow(2.0*ks*ga,3));
    t = t/(2.0*rho*ks*ga);
    exGCprime.c[i] = exvx(rho, sa, u, v) + 
      corvx(rs, zet, t, ub, vv[i], ww, ga, ec, eczet, ecrs, sk, fk);
  }

  myfree(d[0]);
  myfree(d[1]);
  myfree(d[2]);
  myfree(uu);
  myfree(vv);
 
  return exGCprime;
}


