/****************************************************************************
 *
 * DFT++:  density functional package developed by
 *         the research group of Prof. Tomas Arias, MIT.
 *
 * Principal author: Sohrab Ismail-Beigi
 *
 * Modifications for MPI version: Kenneth P Esler,
 *                                Sohrab Ismail-Beigi, and
 *                                Tairan Wang.
 *
 * Modifications for LSD version: Jason A Cline
 *
 * Modifications for lattice/Pulay forces: Gabor Csanyi and
 *                                         Sohrab Ismail-Beigi
 *
 * Copyright (C) 1996-1998 The Massachusetts Institute of Technology (MIT).
 *
 ****************************************************************************/

/*
 *  Tairan Wang,          Mar 31. 1999
 * 
 *
 *                                                                
 * Class IonDyninfo:                                              
 *    Extended class from Ioninfo to include dynamics information 
 *                                                                
 */

/* $Id: IonDyninfo.c,v 1.3 1999/11/19 15:57:59 tairan Exp $ */

#include <math.h>
#include <stdio.h>
#include <string.h>
#include "header.h"
#include "parallel.h"

/***************************************************
 * Implement  IonDyninfo  member functions         *
 ***************************************************/

IonDyninfo::IonDyninfo()
{
  atpos_old = atpos_next = atvel = NULL;
  basis = NULL;
  ionic_kT = 0.0; 
  ionic_stepsize = 0.0;
  velsc_flag = 0;

  oldFileRead  = NULL;
  oldFileWrite = NULL;
}

IonDyninfo::~IonDyninfo()
{
  int sp;
  if (atpos_old != NULL) {
    for (sp = 0; sp < ioninfo.nspecies; sp++) {
      myfree(atpos_old[sp]);
      myfree(atpos_next[sp]);
      myfree(atvel[sp]);
    }
    myfree(atpos_old);
    myfree(atpos_next);
    myfree(atvel);
  }
  atpos_old = atpos_next = atvel = NULL;
  basis = NULL;
}


/*
 * IonDyninfo::print_pos :
 *
 * Print out ion positions:
 *  If  flag >  0, print next position,
 *  If  flag == 0, print current position,
 *  If  flag <  0, print old position.
 */
void
IonDyninfo::print_pos(Output *out, int flag) const
{
  int sp, nat;

  if (flag==0) {
    dft_log("Dumping current position to %s\n\n",out->filename);
    for (sp = 0; sp < ioninfo.nspecies; sp++)
      for (nat = 0; nat < ioninfo.species[sp].natoms; nat++)
	out->printf("%3d %4d %20.10e %20.10e %20.10e\n",
		    sp, nat,
		    ioninfo.species[sp].atpos[nat].v[0],
		    ioninfo.species[sp].atpos[nat].v[1],
		    ioninfo.species[sp].atpos[nat].v[2] );
  } else {
    vector3 **p;
    
    if (flag > 0) {
      dft_log("Dumping next position to %s\n\n",out->filename);
      p = atpos_next;
    } else {
      dft_log("Dumping old position to %s\n\n",out->filename);
      p = atpos_old;
    }
    for (sp = 0; sp < ioninfo.nspecies; sp ++)
      for (nat = 0; nat < ioninfo.species[sp].natoms; nat++) {
	out->printf("%3d %4d %20.10e %20.10e %20.10e\n",
		    sp, nat, p[sp][nat].v[0], p[sp][nat].v[1], p[sp][nat].v[2] );
      }
  }
}

void
IonDyninfo::print_pos(Output *out, int flag, int nstep) const
{
  int sp, nat;

  if (flag==0) {
    dft_log("Dumping current position to %s\n\n",out->filename);
    out->printf("%3d\n", nstep);
    for (sp = 0; sp < ioninfo.nspecies; sp++)
      for (nat = 0; nat < ioninfo.species[sp].natoms; nat++)
	out->printf("%3d %4d %20.10e %20.10e %20.10e\n",
		    sp, nat,
		    ioninfo.species[sp].atpos[nat].v[0],
		    ioninfo.species[sp].atpos[nat].v[1],
		    ioninfo.species[sp].atpos[nat].v[2] );
	
  } else {
    vector3 **p;
    
    if (flag > 0) {
      dft_log("Dumping next position to %s\n\n",out->filename);
      p = atpos_next;
    } else {
      dft_log("Dumping old position to %s\n\n",out->filename);
      p = atpos_old;
    }
    for (sp = 0; sp < ioninfo.nspecies; sp ++)
      for (nat = 0; nat < ioninfo.species[sp].natoms; nat++)
	out->printf("%3d %4d %20.10e %20.10e %20.10e\n",
		    sp, nat, p[sp][nat].v[0], p[sp][nat].v[1], p[sp][nat].v[2] );
  }
}


/*
 * IonDyninfo::ioninit : 
 *
 *  Initialize variables for ion dynamics;
 *  Upon completion, vector3 arrays  atpos_old, atpos_next and atvel
 * should be allocated and initialized.
 * atvel is generated randomly with constraint of total internal energy 
 * being 1.5kT; atpos_old is the input ion locations; the output ion locations
 * is the input + v*dt; atpos_next = input locations + 2*v*dt
 *
 *  matrix3 AAT, and invR  are also initialized.
 */
void
IonDyninfo::ioninit()
{
  int sp, nspecies, nat, n, nmov;
  int i, j;
  real totke, totke1, scfac;
  real dt = ionic_stepsize;

  /* 
   *   Setup storage spaces, and count non-moving atoms
   */
  nmov = 0;
  nspecies = ioninfo.nspecies;
  atpos_old  = (vector3 **) mymalloc(sizeof(vector3*)*nspecies, "atpos_old", "ioninit");
  atpos_next = (vector3 **) mymalloc(sizeof(vector3*)*nspecies, "atpos_next", "ioninit");
  atvel      = (vector3 **) mymalloc(sizeof(vector3*)*nspecies, "atvel", "ioninit");
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      atpos_old[sp]  = (vector3 *) mymalloc(sizeof(vector3)*nat, "atpos_old[]", "ioninit");
      atpos_next[sp] = (vector3 *) mymalloc(sizeof(vector3)*nat, "atpos_next[]", "ioninit");
      atvel[sp]      = (vector3 *) mymalloc(sizeof(vector3)*nat, "atvel[]", "ioninit");
      for (n = 0; n < nat; n++)
	if (ioninfo.species[sp].move[n] > 0) 
	  nmov ++;
    }
  
  // build AAT
  for (i = 0; i < 3; i++)
    for (j = 0; j < 3; j++)
      AAT.m[i][j] = basis->latvec[i] * basis->latvec[j];

  // calculate inv(R), RRT once and store them.
  invR = inv3(basis->latvec);
  RRT  = invR * (~invR );
  
  /* 
   *     Initialize the ion positions for a dynamics run (Verlet algorithm).
   *     r(i+1) = r(i-1) + 2*dt*v(i), where v(i) is randomly generated with 
   *     an average temperature T.
   */
  
  // generate the v(i) with temp: K.E. = 1.5*nmov*kb*temp = sum mv^2
  // starting off by putting  3/2*kb*T (kinetic) into system.
  // if need equilibrating with other degrees of freedom, use velocity
  // rescaling in iondyn.
  totke1 = 1.5 * nmov * ionic_kT; // in hartree
  
  dft_log("------      ioninit()      ------\n");
  dft_log(" Total KE1 (atomic) = %lg (Hartree) | %4.1f Kelvin\n", 
	    totke1, ionic_kT * 315773 );

  // randomly generate velocity. (has to be consistant across processes)
  // at this point, just make sure that the distribution is
  // randomly spherical.
  totke = 0.0;
  System::seed_with_time(DFT_SMALL_DATA);
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      for (n = 0; n < nat; n++)
	if (ioninfo.species[sp].move[n] > 0) 
	  {
	    for (i = 0; i < 3; i++)
	      atvel[sp][n].v[i] = (rnd() - 0.5);
	    totke += ioninfo.species[sp].mass * atvel[sp][n] * atvel[sp][n];
	  }
    }
  totke *= 0.5;  

  if (totke > 0.0) {
    scfac = sqrt(totke1/totke);
    dft_log(" Total KE (atomic) = %lg\t Scaling factor = %lg\n",
	      totke, scfac);

    for (sp = 0; sp < ioninfo.nspecies; sp++)
      {
	nat = ioninfo.species[sp].natoms;
	for (n = 0; n < nat; n++)
	  if (ioninfo.species[sp].move[n] > 0) {
	    atvel[sp][n] = atvel[sp][n] * scfac;  
	    // atvel is in  sqrt(hartree/mass)
	    // mass is in the unit of hartree/(bohr/fsec)^2 = 1.556907 x 10^-27 kg
	    // and should have been taking cared of in setup_ion routine.
	    // At this point, velocity is still in (x,y,z) coordinates.
	    // convert to lattice coordinates.
	    atvel[sp][n] = invR * atvel[sp][n];
	  } else {
	    atvel[sp][n].v[0] = atvel[sp][n].v[1] = atvel[sp][n].v[2] = 0.0;
	  }
      }
  } else
    dft_log(" Total KE (atomic) = %lg\t No ions are moved.\n",
	      totke);

    
  // now generate r(i+1) & define r(i)
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      for (n = 0; n < nat; n++)
	if (ioninfo.species[sp].move[n] > 0) 
	  {
	    atpos_next[sp][n] = ioninfo.species[sp].atpos[n] + 2.0 * dt * atvel[sp][n];
	    atpos_old[sp][n] = ioninfo.species[sp].atpos[n];
	    ioninfo.species[sp].atpos[n] = 0.5 * (atpos_next[sp][n] + atpos_old[sp][n]);
	  }
	else
	  {  // non-moving ions
	    atpos_next[sp][n] = atpos_old[sp][n] = 
	      ioninfo.species[sp].atpos[n];
	  }
    }

  // write ion velocities:
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      dft_log("\nspecies = %d, natoms = %d, initial velocities follow (lattice units/fs):\n\n",
		sp, nat);
      for (n = 0; n < nat; n++)
	dft_log("%3d %4d %20.10le %20.10le %20.10le\n",
		  sp, n,
		  atvel[sp][n].v[0],
		  atvel[sp][n].v[1],
		  atvel[sp][n].v[2]);
      }
  dft_log_flush();

#ifdef DFT_MPI
  MPI_Barrier(MPI_COMM_WORLD);
#endif

}


/*
 * IonDyninfo::ioncontinue :
 *
 *  Initialize variables for ion dynamics;
 *  Upon completion, vector3 arrays  atpos_old, atpos_next and atvel
 * should be allocated and initialized.
 * atpos_old is read in;
 * atvel is calculated from  current ion positions and atpos_old;
 * atpos_next  is the extrapolation of atpos_old and current position.
 *
 *  matrix3 AAT, and invR  are also initialized.
 */
void
IonDyninfo::ioncontinue(Elecinfo *elecinfo)
{
  int sp, nspecies, nat, n, nmov;
  int i, j;
  real dt = ionic_stepsize;

  dft_log("------      ioncontinue()      ------\n");

  /* 
   *   Setup storage spaces, and count non-moving atoms
   */
  nmov = 0;
  nspecies = ioninfo.nspecies;
  atpos_old  = (vector3 **) mymalloc(sizeof(vector3*)*nspecies, "atpos_old", "ioninit");
  atpos_next = (vector3 **) mymalloc(sizeof(vector3*)*nspecies, "atpos_next", "ioninit");
  atvel      = (vector3 **) mymalloc(sizeof(vector3*)*nspecies, "atvel", "ioninit");
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      atpos_old[sp]  = (vector3 *) mymalloc(sizeof(vector3)*nat, "atpos_old[]", "ioninit");
      atpos_next[sp] = (vector3 *) mymalloc(sizeof(vector3)*nat, "atpos_next[]", "ioninit");
      atvel[sp]      = (vector3 *) mymalloc(sizeof(vector3)*nat, "atvel[]", "ioninit");
      for (n = 0; n < nat; n++)
	if (ioninfo.species[sp].move[n] > 0) 
	  nmov ++;
    }

  // build AAT
  for (i = 0; i < 3; i++)
    for (j = 0; j < 3; j++)
      AAT.m[i][j] = basis->latvec[i] * basis->latvec[j];

  // calculate inv(R), RRT once and store them.
  invR = inv3(basis->latvec);
  RRT  = invR * (~invR );
  
  /*
   * Read in current ion locations from IONLOC (overwriting existing information)
   * Read in old ion locations from  IONLOC.OLD
   * Read filling info from FILLINGS.
   */
  {
    dft_text_FILE * in_file;
    char line[DFT_LINE_LEN];
    int k;
    real filling;

    if ( (in_file = dft_text_fopen("IONLOC", "r")) == (dft_text_FILE*)0 )
      die("Can't read IONLOC. Aborting\n");
    // Read ionic restart iteration information:
    dft_text_fgets(line, DFT_LINE_LEN, in_file);
    if ( ion_start_iter < 0) {
      sscanf(line, "%d", &ion_start_iter);
    }
    // Read positions
    for (sp = 0; sp < ioninfo.nspecies; sp ++) {
      for (nat = 0; nat < ioninfo.species[sp].natoms; nat++) {
	dft_text_fgets(line, DFT_LINE_LEN, in_file);
	sscanf(line, " %*d %*d %lg %lg %lg ",
	       &(ioninfo.species[sp].atpos[nat].v[0]),
	       &(ioninfo.species[sp].atpos[nat].v[1]),
	       &(ioninfo.species[sp].atpos[nat].v[2]) );
      }
    }
    dft_text_fclose(in_file);

    if ( (in_file = dft_text_fopen("IONLOC.OLD", "r")) == (dft_text_FILE*)0 )
      die("Can't read IONLOC.OLD. Aborting\n");
    for (sp = 0; sp < ioninfo.nspecies; sp ++) {
      for (nat = 0; nat < ioninfo.species[sp].natoms; nat++) {
	dft_text_fgets(line, DFT_LINE_LEN, in_file);
	sscanf(line, " %*d %*d %lg %lg %lg ",
	       &(atpos_old[sp][nat].v[0]),
	       &(atpos_old[sp][nat].v[1]),
	       &(atpos_old[sp][nat].v[2]) );
      }
    }
    dft_text_fclose(in_file);

    if ( (in_file = dft_text_fopen("FILLINGS","r")) == (dft_text_FILE*)0 ) {
      dft_log("No continued filling information from previous ion step\n\n");
    } else {
      dft_text_fgets(line, DFT_LINE_LEN, in_file);
      for (k = 0; k < elecinfo->nkpts; k++) {
	// skip 4 lines: see print format of  print_Hsub_eigs
	for (i = 0; i < 4; i++)
	  dft_text_fgets(line, DFT_LINE_LEN, in_file); 
	for (i = 0; i < elecinfo->F[k].n; i++) {
	  dft_text_fgets(line, DFT_LINE_LEN, in_file);
	  sscanf(line, "%*d %lg", &filling);
	  elecinfo->F[k].c[i] = (scalar) filling;
	}
      }
      dft_text_fclose(in_file);
    }
  }

  // now generate r(i+1) & define v(i)
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      for (n = 0; n < nat; n++)
	if (ioninfo.species[sp].move[n] > 0) 
	  {
	    atpos_next[sp][n] = 2.0 * ioninfo.species[sp].atpos[n] - atpos_old[sp][n];
	    atvel[sp][n] = ( ioninfo.species[sp].atpos[n] - atpos_old[sp][n] )/dt;
	  }
    }

  // write old ion positions:
  dft_log("\nold atomic positions:\n");
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      dft_log("\nspecies = %d, natoms = %d, old atomic positions follow:\n\n",
		sp, nat);
      for (n = 0; n < nat; n++)
	dft_log("%3d %4d %20.10le %20.10le %20.10le\n",
		  sp, n,
		  atpos_old[sp][n].v[0],
		  atpos_old[sp][n].v[1],
		  atpos_old[sp][n].v[2]);
    }

  // write current ion positions:
  dft_log("\ncurrent atomic positions:\n");
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      dft_log("\nspecies = %d, natoms = %d, current atomic positions follow:\n\n",
		sp, nat);
      for (n = 0; n < nat; n++)
	dft_log("%3d %4d %20.10le %20.10le %20.10le\n",
		  sp, n,
		  ioninfo.species[sp].atpos[n].v[0],
		  ioninfo.species[sp].atpos[n].v[1],
		  ioninfo.species[sp].atpos[n].v[2]);
    }
    
  // write ion velocities:
  dft_log("\ncurrent ion velocites:\n");
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      dft_log("\nspecies = %d, natoms = %d, initial velocities follow (lattice units/fs):\n\n",
		sp, nat);
      for (n = 0; n < nat; n++)
	dft_log("%3d %4d %20.10le %20.10le %20.10le\n",
		  sp, n,
		  atvel[sp][n].v[0],
		  atvel[sp][n].v[1],
		  atvel[sp][n].v[2]);
    }
}


/*
 * IonDyninfo::iondyn :
 *
 *     Ion dynamics using the Verlet algorithm:
 *     r(i+1) = 2*r(i) - r(i-1) + dt^2*a(i)
 *
 *     One specific example is for GaAs(110) surface problem:
 *     3x3 surface units (i.e., 9 Ga atoms and 9 As atoms per layer) and 
 *     5 layer slab (45 Ga and 45 As atoms) with H termination at the 
 *     bottom(18 H atoms). One GaAs layers are fixed in the bulk positions
 *     and 4 layers are used for dyanmics calculations. (WARNING: Test 
 *     calculations show that two bottom layers are significantly influenced 
 *     by the H termination.) - K. Cho
 *
 */
void
IonDyninfo::iondyn( int iter )
{
  // calculate accelerations from forces and express in A**2/fsec/SIZE
  vector3 aion;
  int sp, nat, n, nmov;
  real totke;
  real dt = ionic_stepsize;

  dft_log("------      iondyn()       ------\n");

  nmov = 0;
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      for (n = 0; n < nat; n++)
	{
	  // atom dynamics
	  if (ioninfo.species[sp].move[n] > 0) 
	    {
	      // calculate real space acceleration,
	      // in units of  hartree/bohr/mass

		  aion =  RRT * ( ioninfo.species[sp].forces[n] / ioninfo.species[sp].mass );

	      // aion is in  bohr/fsec/fsec, and is in real space.

	      atpos_next[sp][n] = 2.0 * ioninfo.species[sp].atpos[n] - atpos_old[sp][n] + dt*dt* aion;
	      atvel[sp][n] = (atpos_next[sp][n] - atpos_old[sp][n])/(2.0*dt);

	      nmov++;
	    }    // else  non-moving ions, nothing to do.
	}
    }

  // total kinetic energy
  totke = 0.0;
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      for (n = 0; n < nat; n++)
	if (ioninfo.species[sp].move[n] > 0) 
	  totke += ioninfo.species[sp].mass * atvel[sp][n] * (AAT * atvel[sp][n]);
    }
  totke *= 0.5;

  if ((velsc_flag == 1) && (nmov > 0)) {
    // rescale velocity
    // CAUTION: total ion KE is only 3/2 kT, velocity rescaling must 
    // be initiated after KE, Epotential have achieve equilibrium.
    real totke1 = 1.5 * nmov * ionic_kT; // in hartree

    dft_log("rescaling velocity to:\n");
    dft_log(" Total KE1 (atomic) = %lg (Hartree) | %4.1f Kelvin\n", 
	      totke1, ionic_kT * 315773 );

    real scfac = sqrt(totke1/totke);

    dft_log("current KE = %lg ; scfac = %lg:\n",
	      totke, scfac);

    // scale v(i) and 
    // generate r(i+1) again with scaled v(i)
    for (sp = 0; sp < ioninfo.nspecies; sp++)
      {
	nat = ioninfo.species[sp].natoms;
	for (n = 0; n < nat; n++)
	  if (ioninfo.species[sp].move[n] > 0)
	    {
	      atvel[sp][n] = atvel[sp][n] * scfac;
	      atpos_next[sp][n] = atpos_old[sp][n] + 2.0 * dt * atvel[sp][n];
	    }
      }
  }

  // increment in time
  for (sp = 0; sp < ioninfo.nspecies; sp++) {
    nat = ioninfo.species[sp].natoms;
    for (n = 0; n < nat; n++)	{
      if (ioninfo.species[sp].move[n] > 0) {
	atpos_old[sp][n] = ioninfo.species[sp].atpos[n];
	ioninfo.species[sp].atpos[n] = atpos_next[sp][n];
      }
    }
  }


  // Write new positions
  dft_log("\ncurrent atomic positions:\n\n");
  for (sp = 0; sp < ioninfo.nspecies; sp++) {
    nat = ioninfo.species[sp].natoms;
    dft_log("\nspecies = %d, natoms = %d, positions follow:\n\n",
	      sp, nat);
    for (n = 0; n < nat; n++)
      dft_log("%3d %4d %20.10le %20.10le %20.10le\n",
		sp, n,
		ioninfo.species[sp].atpos[n].v[0],
		ioninfo.species[sp].atpos[n].v[1],
		ioninfo.species[sp].atpos[n].v[2]);
  }


  // write ion velocities:
  dft_log("\ncurrent ion velocites:\n");
  for (sp = 0; sp < ioninfo.nspecies; sp++)
    {
      nat = ioninfo.species[sp].natoms;
      dft_log("\nspecies = %d, natoms = %d, initial velocities follow (lattice units/fs):\n\n", 
                sp, nat);
      for (n = 0; n < nat; n++)
        dft_log("%3d %4d %20.10le %20.10le %20.10le\n",
                  sp, n,
                  atvel[sp][n].v[0],
                  atvel[sp][n].v[1],
                  atvel[sp][n].v[2]);
    }


  dft_log("Iteration # %d :  K.E. = %lg\n", 
	    iter, totke);

}




/*
 * Read in the old wavefunction from  oldfname,
 * bundle by bundle, and column by column, 
 * then extrapolate the current wavefunction with:
 *
 *  Y_new =  2 * C_current - C_old;
 *  (where  C_new can be calculated from Y_new)
 *
 *  ref: T.A.Arias, M.C.Payne, and J.D.Joannopoulos, PRL, 69, p1077.
 */
void
IonDyninfo::extrapolate_column_bundle_array(char *oldfname, int nbundles, 
					    column_bundle *C, column_bundle *Y)
{
  FILE *filep = NULL;
  int i, j, k;
  int my_ncols = C[0].my_ncols;
  int col_length = 0;
  Basis *basis = C[0].basis;

  // assume the number of cols differ only by one across processes.
  for (k=0; k < nbundles; k++) {
    if (col_length < C[k].col_length)
      col_length = C[k].col_length;
  }
  column_bundle data(1, col_length, basis, "local");


#ifdef DFT_MPI

  int io_node = System::Get_IOprocID();
  int My_procID = System::Get_procID();
  int N_Procs = System::Get_N_Procs();
  int ready_flag = 0;

  if (My_procID == io_node) {    // The IO-node

    filep = fopen(oldfname, "r");
    if (filep == (FILE *)0) {
      // tell all processors to quit.
      MPI_Bcast ( &ready_flag, 1, MPI_INT, io_node, MPI_COMM_WORLD );
      dft_log("Can't read file='%s' in extrapolate_column_bundle_array\n",
	      oldfname);
      dft_log("Skip extrapolation step.\n");
      return;
    }
    ready_flag = 1; // tell all processors to receive.
    MPI_Bcast ( &ready_flag, 1, MPI_INT, io_node, MPI_COMM_WORLD );

    for (i=0; i < nbundles; i++) {
      // Temporary holding places for MPI.
      int ncols, ncol, nproc;
      MPI_Status status;
      // manually adjust col_length
      col_length = C[i].col_length;

      // CRITICAL: data.col[i].c[j] must be continuous, with j being
      // the inner index.
      for ( nproc = 0; nproc < N_Procs; nproc++ ) {

	if ( nproc == My_procID ) {

	  for (ncol =0; ncol < my_ncols; ncol++) {
	    fread(data.col[0].c, sizeof(scalar), col_length, filep);
	    // 
	    // extrapolate the wavefunction.
	    //
	    for (j = 0; j < col_length; j++) {
	      Y[i].col[ncol].c[j] = C[i].col[ncol].c[j] * 2.0;
	      Y[i].col[ncol].c[j] -= data.col[0].c[j];
	    }
	  }

	} else {
	
	  MPI_Recv(&ncols, 1, MPI_INT, nproc, 0, MPI_COMM_WORLD, &status);

	  for (ncol=0; ncol < ncols; ncol++) {

	    fread(data.col[0].c,sizeof(scalar),col_length,filep);

	    if (ncols > 0) {
	      MPI_Send(data.col[0].c, col_length*SCALAR_SIZE, MPI_DOUBLE, nproc, 1,MPI_COMM_WORLD);
	    }
	  }
	}
      }
    }

    fclose(filep);

  } else {  // For all other Non-IO-nodes

    // Get ready signal
    MPI_Bcast ( &ready_flag, 1, MPI_INT, io_node, MPI_COMM_WORLD );

    if (ready_flag == 1) {

      for (i=0; i < nbundles; i++) {
	MPI_Status status;
	int ncol;

	// manually adjust col_length
	col_length = C[i].col_length;

	MPI_Send(&my_ncols, 1, MPI_INT, io_node, 0, MPI_COMM_WORLD);

	// Receive data from processor io_node.
	for (ncol = 0; ncol < my_ncols; ncol++) {


	  MPI_Recv(data.col[0].c, col_length*SCALAR_SIZE, MPI_DOUBLE, 
		   io_node, 1, MPI_COMM_WORLD, &status);

	  //
	  // extrapolate wavefunction.
	  //
	  for (j = 0; j < col_length; j++) {
	    Y[i].col[ncol].c[j] = C[i].col[ncol].c[j] * 2.0;
	    Y[i].col[ncol].c[j] -= data.col[0].c[j];
	  }
	}
      }
    } else {
      // Error. Skip reading and extrapolating
      return;
    }
  }
  
  MPI_Barrier(MPI_COMM_WORLD);  // just to be safe, synchronize.

#else // DFT_MPI

  filep = fopen(oldfname, "r");
  if (filep == (FILE *)0) {
    dft_log("Can't read file='%s' in extrapolate_column_bundle_array\n",
	    oldfname);
    dft_log("Skip extrapolation step.\n");
    return;
  }

  for (i=0; i < nbundles; i++) {
    // Temporary holding places for MPI.
    int ncol;
    // manually adjust col_length
    col_length = C[i].col_length;

    for (ncol =0; ncol < my_ncols; ncol++) {
      fread(data.col[0].c, sizeof(scalar), col_length, filep);
      // 
      // extrapolate the wavefunction.
      //
      for (j = 0; j < col_length; j++) {
	Y[i].col[ncol].c[j] = C[i].col[ncol].c[j] * 2.0;
	Y[i].col[ncol].c[j] -= data.col[0].c[j];
      }
    }
  }

#endif // DFT_MPI

}


void
IonDyninfo::check_pos()
{
  // ask ioninfo to check current position.
  ioninfo.check_pos();
}


int
IonDyninfo::dump_n_extrapolate_Y(Elecvars &elecvars,
				     const Control &cntrl,
				     int nkpts,
				     int iter)
{
  char fname[DFT_FILENAME_LEN], *tempFile;

  /*************************************************************
   **                                                         **
   ** Dump density for record saving in ion dynamics          **
   **                                                         **
   *************************************************************/

  sprintf(fname,"n.step%d", iter);
  if (cntrl.dump_n) {
    dft_log("Dumping '%s' ...", fname); dft_log_flush();
    elecvars.n.write(fname);
    dft_log("done.\n"); dft_log_flush();
  }

  //
  // Both files associated with init_Y_filename, and old_Y_filename
  // are overwritten eventually.
  //
  dft_log("Dump current wave function to %s.\n",
	    oldFileWrite);
  dft_log_flush();

      // dump current Y in some temporary file
      // 1/25/1999  Tairan Wang: We should really be using C for extrapolateion purpose:
      // dump current C
  write_column_bundle_array(oldFileWrite,nkpts,elecvars.C);
  // (extrapolate new Y by reading in the old Y)
  // extrapolate new Y by reading in the old C
  dft_log("Read old wave function from %s and extrapolate.\n",
	    oldFileRead);
  dft_log_flush();

  extrapolate_column_bundle_array(oldFileRead,nkpts,
				  elecvars.C,elecvars.Y);

  // switch the two files
  tempFile = oldFileRead; 
  oldFileRead = oldFileWrite; 
  oldFileWrite = tempFile;

  return 0;
}



/*
 * Perform Ion relaxation
 */
static double mean_speed_old = 0.0;

int IonDyninfo::relax_ion(const Control &cntrl)
{
  // Relax ion!!!!
  matrix3 R;
  vector3 f, max_force;
//  vector3 total_speed;
  double mean_speed;

// #ifdef PLANE_CONSTRAINT
//  /* constrain first atom only to move in (1, 1, 1) plane */
//  vector3 plane;
//  
//  plane.v[0] = plane.v[1] = plane.v[2] = 1.0;
//  plane = plane/abs(plane);
// #endif 
  int sp, i, j, nmov = 0;

  max_force.v[0] = max_force.v[1] = max_force.v[2] = 0.0;
// total_speed.v[0] = total_speed.v[1] = total_speed.v[2] = 0.0;
  
  R = basis->latvec;

  /* calculate a virtual ion_speed */
  for (sp=0; sp < ioninfo.nspecies; sp++) {
    for (i=0; i < ioninfo.species[sp].natoms; i++) {
      if (ioninfo.species[sp].move[i]) {
	  f = inv3(~R)*ioninfo.species[sp].forces[i];
	if (abs(f) > abs(max_force))
	  max_force = f;
	for (j = 0; j < 3; j++) {
	  if (ioninfo.species[sp].ion_speed[i].v[j]*ioninfo.species[sp].forces[i].v[j] > 0) {
	    ioninfo.species[sp].ion_speed[i].v[j] 
	      = ioninfo.species[sp].ion_damp*ioninfo.species[sp].ion_speed[i].v[j] 
	      + ioninfo.species[sp].ion_fac*ioninfo.species[sp].forces[i].v[j];
	  }
	  else {
	    ioninfo.species[sp].ion_speed[i].v[j] 
	      = (1.0 - ioninfo.species[sp].ion_damp)*ioninfo.species[sp].ion_speed[i].v[j] 
	      + ioninfo.species[sp].ion_fac*ioninfo.species[sp].forces[i].v[j];
	  }
	}
	      
// #ifdef PLANE_CONSTRAINT
//	/* constrain first atom only to move in (1, 1, 1) plane */
//	if (i == 0)
//	  ioninfo.species[0].ion_speed[0] -= (ioninfo.species[0].ion_speed[0]*plane)*plane;
// #endif    
//	total_speed += ioninfo.species[sp].ion_speed[i];
	nmov++;
      }
    } 
  }

// #ifdef PLANE_CONSTRAINT
//  /* constrain first atom only to move in (1, 1, 1) plane */
//  {
//    vector3 ts_parallel, ts_vertical;
//	  
//    ts_parallel = total_speed - (total_speed*plane)*plane;
//    ts_parallel  *= (1.0/nmov);
//
//    ts_vertical = total_speed - ts_parallel;
//    ts_vertical *= (1.0/(nmov - 1));
//
//    total_speed =  ts_parallel + ts_vertical;		    
//    
//    dft_log("total speed: %g %g %g\n",
//	    total_speed.v[0], total_speed.v[1], total_speed.v[2]);
//  }
// #else
//  total_speed *= (1.0/nmov);
// #endif

  mean_speed = 0.0;
  for (sp=0; sp < ioninfo.nspecies; sp++) {
    /* Move the atoms (depending on the masks) */
    for (i=0; i < ioninfo.species[sp].natoms; i++) {
      if (ioninfo.species[sp].move[i]) {
// #ifdef PLANE_CONSTRAINT
//	/* constrain first atom only to move in (1, 1, 1) plane */
//	if (i == 0) {
//	  ioninfo.species[0].ion_speed[0] -= (total_speed - (total_speed*plane)*plane);
//
//	  dft_log("constraint on atom 0 with speed after subtracting: %g %g %g\n",
//		  ioninfo.species[sp].ion_speed[i].v[0],
//		  ioninfo.species[sp].ion_speed[i].v[1],
//		  ioninfo.species[sp].ion_speed[i].v[2]);
//	} else
// #endif
//	  ioninfo.species[sp].ion_speed[i] -= total_speed;
	      
	mean_speed += abs(ioninfo.species[sp].ion_speed[i]);
	ioninfo.species[sp].atpos[i] += ioninfo.species[sp].ion_speed[i];
      }
    }
  }
	
  dft_log("max force %g %g %g with abs %g\n",
	  max_force.v[0], max_force.v[1], max_force.v[2], abs(max_force));
  mean_speed *= (1.0/nmov);
  dft_log("mean speed on atoms %g with change %g\n",
	  mean_speed, mean_speed_old - mean_speed);
  mean_speed_old = mean_speed;

  for (sp=0; sp < ioninfo.nspecies; sp++) {
    int i;
    dft_log("Updated positions for species = %d  natoms = %d\n",
	    sp,ioninfo.species[sp].natoms);
    for (i=0; i < ioninfo.species[sp].natoms; i++)
      dft_log("%3d %4d %20.14lf %20.14lf %20.14lf\n",
	      sp,i,
	      ioninfo.species[sp].atpos[i].v[0],
	      ioninfo.species[sp].atpos[i].v[1],
	      ioninfo.species[sp].atpos[i].v[2]);
    dft_log("\n");                         
  }

  return (abs(max_force) < cntrl.force_tolerance);

}

