CHROMA
inv_eigcg2.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 /*! \file
3  * \brief Conjugate-Gradient algorithm with eigenstd::vector acceleration
4  */
5 
6 #include <qdp-lapack.h>
7 //#include "octave_debug.h"
8 //#include "octave_debug.cc"
9 
11 
14 
15 //#define DEBUG
16 #define DEBUG_FINAL
17 
18 
19 // NEEDS A LOT OF CLEAN UP
20 namespace Chroma
21 {
22  using namespace LinAlg ;
23  //using namespace Octave ;
24 
25  namespace InvEigCG2Env
26  {
27  // needs a little clean up... for possible exceptions if the size of H
28  // is smaller than hind
29  // LatticeFermionF
30  template<typename T>
32  const LinearOperator<T>& A,
33  const multi1d<T>& evec,
34  int Nvecs)
35  {
36  H.N = Nvecs ;
37  if(Nvecs>evec.size()){
38  H.N = evec.size();
39  }
40  if(H.N>H.size()){
41  QDPIO::cerr<<"OOPS! your matrix can't take this many matrix elements\n";
42  exit(1);
43  }
44  // fill H with zeros
45  H.mat = 0.0 ;
46 
47  T Ap;
48  for(int i(0);i<H.N;i++)
49  {
50  A(Ap,evec[i],PLUS) ;
51  for(int j(i);j<H.N;j++)
52  {
53  H(j,i) = innerProduct(evec[j], Ap, A.subset()) ;
54  //enforce hermiticity
55  H(i,j) = conj(H(j,i));
56  if(i==j) H(i,j) = real(H(i,j));
57  }
58  }
59  }
60 
61  template<typename T>
63  const LinearOperator<T>& A,
64  const multi1d<T>& evec,
65  const multi1d<Double>& eval,
66  int Nvecs,int NgoodEvecs)
67  {
68  H.N = Nvecs ;
69  if(Nvecs>evec.size()){
70  H.N = evec.size();
71  }
72  if(H.N>H.size()){
73  QDPIO::cerr<<"OOPS! your matrix can't take this many matrix elements\n";
74  exit(1);
75  }
76  // fill H with zeros
77  H.mat = 0.0 ;
78 
79  for(int i(0);i<NgoodEvecs;i++)
80  H(i,i) = eval(i);
81 
82  T Ap;
83  for(int i(NgoodEvecs);i<H.N;i++)
84  {
85  A(Ap,evec[i],PLUS) ;
86  for(int j(0);j<H.N;j++)
87  {
88  H(j,i) = innerProduct(evec[j], Ap, A.subset()) ;
89  //enforce hermiticity
90  H(i,j) = conj(H(j,i));
91  if(i==j) H(i,j) = real(H(i,j));
92  }
93  }
94  }
95 
96  //The new code
97  template<typename T>
99  T& x,
100  const T& b,
101  multi1d<Double>& eval,
102  multi1d<T>& evec,
103  int Neig,
104  int Nmax,
105  const Real& RsdCG, int MaxCG,
106  const int PrintLevel)
107  {
108  START_CODE();
109 
110  FlopCounter flopcount;
111  flopcount.reset();
112  StopWatch swatch;
113  swatch.reset();
114  swatch.start();
115 
117 
118  T p ;
119  T Ap;
120  T r ;
121  //T z ;
122 
123  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
124  Double alphaprev, alpha,pAp;
125  Real beta;
126  Double betaprev ;
127  Double r_dot_z, r_dot_z_old ;
128 
129  int k = 0 ;
130  A(Ap,x,PLUS) ;
131  r[A.subset()] = b - Ap ;
132  r_dot_z = norm2(r,A.subset()) ;
133 
134 #if 1
135  QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size() << std::endl;
136  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_dot_z << std::endl;
137 #endif
138  Matrix<DComplex> H(Nmax) ; // square matrix containing the tridiagonal
139  Vectors<T> vec(Nmax) ; // contains the vectors we use...
140 
141 
142  beta=0.0;
143  alpha = 1.0;
144  T Ap_prev ;
145  T tt ;
146 
147  // Algorithm from page 529 of Golub and Van Loan
148  // Modified to match the m-code from A. Stathopoulos
149  while(toBool(r_dot_z>rsd_sq)){
150  /** preconditioning algorithm **/
151  //z[A.subset()]=r ; //preconditioning can be added here
152  //r_dot_z = innerProductReal(r,z,A.subset());
153  //****//
154  r_dot_z_old = r_dot_z ;
155  r_dot_z = norm2(r,A.subset());
156  Double inv_sqrt_r_dot_z = 1.0/sqrt(r_dot_z) ;
157  k++ ;
158  if(k==1){
159  //p[A.subset()] = z ;
160  p[A.subset()] = r ;
161  }
162  else{
163  betaprev = beta ;
164  beta = r_dot_z/r_dot_z_old ;
165  //p[A.subset()] = z + beta*p ;
166  p[A.subset()] = r + beta*p ;
167  }
168  //-------- Eigenvalue eigenstd::vector finding code ------
169  if((Neig>0)&& (H.N == Nmax)) Ap_prev[A.subset()]=Ap ;
170  //---------------------------------------------------
171  A(Ap,p,PLUS) ;
172 
173  //-------- Eigenvalue eigenstd::vector finding code ------
174  if(Neig>0){
175  if(k>1)
176  H(H.N-1,H.N-1) = 1/alpha + betaprev/alphaprev;
177  if(vec.N==Nmax){
178  QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<std::endl ;
179 #ifdef DEBUG
180 #ifndef QDP_IS_QDPJIT
181  {
182  std::stringstream tag ;
183  tag<<"H"<<k ;
184  OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
185  }
186  {
187  Matrix<DComplex> tmp(Nmax) ;
188  SubSpaceMatrix(tmp,A,vec.vec,vec.N);
189  std::stringstream tag ;
190  tag<<"H"<<k<<"ex" ;
191  OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
192  }
193 #endif
194 #endif
195  multi2d<DComplex> Hevecs(H.mat) ;
196  multi1d<Double> Heval ;
197  char V = 'V' ; char U = 'U' ;
198  QDPLapack::zheev(V,U,Nmax,Hevecs,Heval);
199 #ifdef DEBUG
200 #ifndef QDP_IS_QDPJIT
201  {
202  std::stringstream tag ;
203  tag<<"Hevecs"<<k ;
204  OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
205  }
206  for(int i(0);i<Nmax;i++)
207  QDPIO::cout<<" eignvalue: "<<Heval[i]<<std::endl ;
208 #endif
209 #endif
210  multi2d<DComplex> Hevecs_old(H.mat) ;
211  multi1d<Double> Heval_old ;
212  QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
213  for(int i(0);i<Nmax;i++)
214  Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0 ;
215 
216  //Reduce to 2*Neig vectors
217  H.N = Neig + Neig ; // Thickness of restart 2*Neig
218 
219  for(int i(Neig);i<2*Neig;i++)
220  for(int j(0);j<Nmax;j++)
221  Hevecs(i,j) = Hevecs_old(i-Neig,j) ;
222 
223  // Orthogonalize the last Neig vecs (keep the first Neig fixed)
224  // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
225  // TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
226  multi1d<DComplex> TAU ;
227  QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU) ;
228  char R = 'R' ; char L = 'L' ; char N ='N' ; char C = 'C' ;
229  multi2d<DComplex> Htmp = H.mat ;
230  QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
231  QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
232 
233  QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
234 #ifdef DEBUG
235 #ifndef QDP_IS_QDPJIT
236  {
237  std::stringstream tag ;
238  tag<<"Htmp"<<k ;
239  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
240  }
241 #endif
242 #endif
243  for(int i(Neig); i< 2*Neig;i++ ) // mhpws prepei na einai 0..2*Neig
244  for(int j(2*Neig); j<Nmax; j++)
245  Htmp(i,j) =0.0;
246 
247 #ifdef DEBUG
248 #ifndef QDP_IS_QDPJIT
249  {
250  std::stringstream tag ;
251  tag<<"HtmpBeforeZUM"<<k ;
252  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
253  }
254 #endif
255 #endif
256  QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
257 #ifdef DEBUG
258 #ifndef QDP_IS_QDPJIT
259  {
260  std::stringstream tag ;
261  tag<<"HtmpAfeterZUM"<<k ;
262  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
263  }
264 #endif
265 #endif
266 
267 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
268  multi1d<T> tt_vec(2*Neig);
269  for(int i(0);i<2*Neig;i++){
270  tt_vec[i][A.subset()] = zero ;
271  for(int j(0);j<Nmax;j++)
272  tt_vec[i][A.subset()] +=Htmp(i,j)*vec[j] ;
273  }
274  for(int i(0);i<2*Neig;i++)
275  vec[i][A.subset()] = tt_vec[i] ;
276 #else
277  // Here I need the restart_X bit
278  // zgemm("N", "N", Ns*Nc*Vol/2, 2*Neig, Nmax, 1.0,
279  // vec.vec, Ns*Nc*Vol, Htmp, Nmax+1, 0.0, tt_vec, Ns*Nc*Vol);
280  // copy apo tt_vec se vec.vec
281 
282 #endif
283  vec.N = 2*Neig ; // restart the vectors to keep
284 
285  H.mat = 0.0 ; // zero out H
286  for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
287 
288  //A(tt,r,PLUS) ;
289  tt[A.subset()] = Ap - beta*Ap_prev ; //avoid the matvec
290 
291 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
292  for (int i=0;i<2*Neig;i++){
293  H(2*Neig,i)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z ;
294  H(i,2*Neig)=conj(H(2*Neig,i)) ;
295  //H(i,2*Neig)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z ;
296  //H(2*Neig,i)=conj(H(i,2*Neig)) ;
297  }
298 #else
299  // Optimized code for creating the H matrix row and column
300  // asume even-odd layout: Can I check if this is the case at
301  // compile time? or even at run time? This is a good test to
302  // have to avoid wrong results.
303 
304 
305 #endif
306  }//H.N==Nmax
307  else{
308  if(k>1)
309  H(H.N-1,H.N) = H(H.N,H.N-1) = -sqrt(beta)/alpha;
310  }
311  H.N++ ;
312  //vec.NormalizeAndAddVector(z,inv_sqrt_r_dot_z,A.subset()) ;
313  vec.NormalizeAndAddVector(r,inv_sqrt_r_dot_z,A.subset()) ;
314  }
315 
316  pAp = innerProductReal(p,Ap,A.subset());
317  alphaprev = alpha ;// additional line for Eigenvalue eigenstd::vector code
318  alpha = r_dot_z/pAp ;
319  x[A.subset()] += alpha*p ;
320  r[A.subset()] -= alpha*Ap ;
321 
322 
323  //---------------------------------------------------
324  if(k>MaxCG){
325  res.n_count = k;
326  res.resid = sqrt(r_dot_z);
327  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
328  END_CODE();
329  return res;
330  }
331 #if 1
332  QDPIO::cout << "InvEigCG2: k = " << k ;
333  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
334 #endif
335  }//end CG loop
336 
337  if(Neig>0){
338  evec.resize(Neig) ;
339  eval.resize(Neig);
340 
341 #define USE_LAST_VECTORS
342 #ifdef USE_LAST_VECTORS
343 
344  multi2d<DComplex> Hevecs(H.mat) ;
345  multi1d<Double> Heval ;
346  char V = 'V' ; char U = 'U' ;
347  QDPLapack::zheev(V,U,H.N-1,Hevecs,Heval);
348 
349  for(int i(0);i<Neig;i++){
350  evec[i][A.subset()] = zero ;
351  eval[i] = Heval[i] ;
352  for(int j(0);j<H.N-1;j++)
353  evec[i][A.subset()] +=Hevecs(i,j)*vec[j] ;
354  }
355 #else
356  for(int i(0);i<Neig;i++){
357  evec[i][A.subset()] = vec[i] ;
358  eval[i] = real(H(i,i)) ;
359  }
360 #endif
361  }
362  res.n_count = k ;
363  res.resid = sqrt(r_dot_z);
364  swatch.stop();
365  QDPIO::cout << "InvEigCG2: k = " << k << std::endl;
366  flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
367  END_CODE();
368  return res;
369  }
370 
371 
372  template<typename T>
374  T& x,
375  const T& b,
376  multi1d<Double>& eval,
377  multi1d<T>& evec,
378  int Neig,
379  int Nmax,
380  const Real& RsdCG, int MaxCG,
381  const int PrintLevel)
382  {
383  START_CODE();
384 
385  char V = 'V' ; char U = 'U' ;
386 
387  FlopCounter flopcount;
388  flopcount.reset();
389  StopWatch swatch;
390  swatch.reset();
391  swatch.start();
392 
394 
395  T p ;
396  T Ap;
397  T r,z ;
398 
399  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
400  Double alphaprev, alpha,pAp;
401  Real beta ;
402  Double r_dot_z, r_dot_z_old ;
403  //Complex r_dot_z, r_dot_z_old,beta ;
404  //Complex alpha,pAp ;
405 
406  int k = 0 ;
407  A(Ap,x,PLUS) ;
408  r[A.subset()] = b - Ap ;
409  Double r_norm2 = norm2(r,A.subset()) ;
410 
411  if(PrintLevel>0)
412  QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size() << std::endl;
413  if(PrintLevel>1)
414  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = "<<r_norm2<<std::endl;
415 
416  Real inorm(Real(1.0/sqrt(r_norm2)));
417  bool FindEvals = (Neig>0);
418  int tr; // Don't know what this does...
419  bool from_restart;
420  Matrix<DComplex> H(Nmax) ; // square matrix containing the tridiagonal
421  Vectors<T> vec(Nmax) ; // contains the vectors we use...
422  //-------- Eigenvalue eigenstd::vector finding code ------
423  vec.AddVectors(evec,A.subset()) ;
424  //QDPIO::cout<<"vec.N="<<vec.N<<std::endl ;
425  p[A.subset()] = inorm*r ; // this is not needed GramSchmidt will take care of it
426  vec.AddOrReplaceVector(p,A.subset());
427  //QDPIO::cout<<"vec.N="<<vec.N<<std::endl ;
428  normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset());
429  normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset()); //call twice: need to improve...
430  SubSpaceMatrix(H,A,vec.vec,vec.N);
431  from_restart = true ;
432  tr = H.N - 1; // this is just a flag as far as I can tell
433  //-------
434 
435  Double betaprev ;
436  beta=0.0;
437  alpha = 1.0;
438  // Algorithm from page 529 of Golub and Van Loan
439  // Modified to match the m-code from A. Stathopoulos
440  while(toBool(r_norm2>rsd_sq)){
441  /** preconditioning algorithm **
442  z[A.subset()]=r ; //preconditioning can be added here
443  r_dot_z = innerProductReal(r,z,A.subset());
444  **/
445  r_dot_z = innerProductReal(r,r,A.subset());
446  k++ ;
447  betaprev = beta; // additional line for Eigenvalue eigenstd::vector code
448  if(k==1){
449  //p[A.subset()] = z ;
450  p[A.subset()] = r ;
451  H.N++ ;
452  }
453  else{
454  beta = r_dot_z/r_dot_z_old ;
455  //p[A.subset()] = z + beta*p ;
456  p[A.subset()] = r + beta*p ;
457  //-------- Eigenvalue eigenstd::vector finding code ------
458  // fist block
459  if(FindEvals){
460  if(!((from_restart)&&(H.N == tr+1))){
461  if(!from_restart){
462  H(H.N-2,H.N-2) = 1/alpha + betaprev/alphaprev;
463  }
464  from_restart = false ;
465  H(H.N-1,H.N-2) = -sqrt(beta)/alpha;
466  H(H.N-2,H.N-1) = -sqrt(beta)/alpha;
467  }
468  H.N++ ;
469  }
470  //---------------------------------------------------
471  }
472  A(Ap,p,PLUS) ;
473  pAp = innerProductReal(p,Ap,A.subset());
474 
475  alphaprev = alpha ;// additional line for Eigenvalue eigenstd::vector code
476  alpha = r_dot_z/pAp ;
477  x[A.subset()] += alpha*p ;
478  r[A.subset()] -= alpha*Ap ;
479  r_norm2 = norm2(r,A.subset()) ;
480  r_dot_z_old = r_dot_z ;
481 
482 
483  //-------- Eigenvalue eigenstd::vector finding code ------
484  // second block
485  if(FindEvals){
486  if (vec.N==Nmax){//we already have stored the maximum number of vectors
487  // The magic begins here....
488  if(PrintLevel>0)
489  QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<std::endl ;
490  H(Nmax-1,Nmax-1) = 1/alpha + beta/alphaprev;
491 
492  multi2d<DComplex> Hevecs(H.mat) ;
493  multi1d<Double> Heval ;
494  QDPLapack::zheev(V,U,Hevecs,Heval);
495  multi2d<DComplex> Hevecs_old(H.mat) ;
496 
497  multi1d<Double> Heval_old ;
498  QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
499  for(int i(0);i<Nmax;i++)
500 
501  tr = Neig + Neig ; // v_old = Neig optimal choice
502 
503  for(int i(Neig);i<tr;i++)
504  for(int j(0);j<Nmax;j++)
505  Hevecs(i,j) = Hevecs_old(i-Neig,j) ;
506 
507  // Orthogonalize the last Neig vecs (keep the first Neig fixed)
508  // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
509  // TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
510  multi1d<DComplex> TAU ;
511  QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU) ;
512  char R = 'R' ; char L = 'L' ; char N ='N' ; char C = 'C' ;
513  multi2d<DComplex> Htmp = H.mat ;
514  QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
515  QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
516  // Notice that now H is of size 2Neig x 2Neig,
517  // but still with LDA = Nmax
518 
519  QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
520  for(int i(Neig); i< 2*Neig;i++ )
521  for(int j(2*Neig); j<Nmax; j++)
522  Htmp(i,j) =0.0;
523 
524  QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
525 
526  // Here I need the restart_X bit
527  multi1d<T> tt_vec = vec.vec;
528  for(int i(0);i<2*Neig;i++){
529  vec[i][A.subset()] = zero ;
530  for(int j(0);j<Nmax;j++)
531  vec[i][A.subset()] +=Htmp(i,j)*tt_vec[j] ; // NEED TO CHECK THE INDEXING
532  }
533 
534  H.mat = 0.0 ; // zero out H
535  for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
536 
537  T Ar ;
538  // Need to reorganize so that we aboid this extra matvec
539  A(Ar,r,PLUS) ;
540  Ar /= sqrt(r_norm2);
541  // this has the oposite convension than the subspace matrix
542  // this is the reason the std::vector reconstruction in here does not
543  // need the conj(H) while in the Rayleigh Ritz refinement
544  // it does need it.
545  // In here the only complex matrix elements are these computined
546  // in the next few lines. This is why the inconsistency
547  // does not matter anywhere else.
548  // In the refinement step though all matrix elements are complex
549  // hence things break down.
550  // Need to fix the convension so that we do not
551  // have these inconsistencies.
552  for (int i=0;i<2*Neig;i++){
553  H(2*Neig,i) = innerProduct(vec[i], Ar, A.subset()) ;
554  H(i,2*Neig) = conj(H(2*Neig,i)) ;
555  }
556  H(2*Neig,2*Neig) = innerProduct(r, Ar, A.subset())/sqrt(r_norm2) ;
557 
558  H.N = 2*Neig + 1 ; // why this ?
559  from_restart = true ;
560  vec.N = 2*Neig ; // only keep the lowest Neig. Is this correct???
561 
562  }
563  // Here we add a std::vector in the list
564  Double inorm = 1.0/sqrt(r_norm2) ;
565  vec.NormalizeAndAddVector(r,inorm,A.subset()) ;
566  // Shouldn't be the z std::vector when a preconditioner is used?
567  }
568  //---------------------------------------------------
569 
570  if(k>MaxCG){
571  res.n_count = k;
572  res.resid = sqrt(r_norm2);
573  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
574  END_CODE();
575  return res;
576  }
577  if(PrintLevel>1){
578  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2 ;
579  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
580  }
581  }
582  res.n_count = k ;
583  res.resid = sqrt(r_norm2);
584  if(FindEvals)
585  {
586  // Evec Code ------ Before we return --------
587  // vs is the current number of vectors stored
588  // Neig is the number of eigenstd::vector we want to compute
589  normGramSchmidt(vec.vec,0,2*Neig,A.subset());
590  normGramSchmidt(vec.vec,0,2*Neig,A.subset());
591  Matrix<DComplex> Htmp(2*Neig) ;
592  SubSpaceMatrix(Htmp,A,vec.vec,2*Neig);
593 
594  //char V = 'V' ; char U = 'U' ;
595  multi1d<Double> tt_eval ;
596  QDPLapack::zheev(V,U,Htmp.mat,tt_eval);
597  evec.resize(Neig) ;
598  eval.resize(Neig);
599  for(int i(0);i<Neig;i++){
600  evec[i][A.subset()] = zero ;
601  eval[i] = tt_eval[i] ;
602  for(int j(0);j<2*Neig;j++)
603  evec[i][A.subset()] += Htmp(i,j)*vec[j] ;
604  }
605 
606  // I will do the checking of eigenstd::vector quality outside this routine
607  // -------------------------------------------
608  if(PrintLevel>3){
609  // CHECK IF vec are eigenvectors...
610  T Av ;
611  for(int i(0);i<Neig;i++){
612  A(Av,evec[i],PLUS) ;
613  DComplex rq = innerProduct(evec[i],Av,A.subset());
614  Av[A.subset()] -= eval[i]*evec[i] ;
615  Double tt = sqrt(norm2(Av,A.subset()));
616  QDPIO::cout<<"FINAL: error eigenstd::vector["<<i<<"] = "<<tt<<" " ;
617  tt = sqrt(norm2(evec[i],A.subset()));
618  QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
619  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
620  }
621  }
622  }
623 
624  res.n_count = k;
625  res.resid = sqrt(r_norm2);
626  swatch.stop();
627  QDPIO::cout << "InvEigCG2: k = " << k << std::endl;
628  flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
629  END_CODE();
630  return res;
631  }
632 
633 
634 
635 #if 0
636  //The old code
637  template<typename T>
639  T& x,
640  const T& b,
641  multi1d<Double>& eval,
642  multi1d<T>& evec,
643  int Neig,
644  int Nmax,
645  const Real& RsdCG, int MaxCG)
646  {
647  START_CODE();
648 
649  FlopCounter flopcount;
650  flopcount.reset();
651  StopWatch swatch;
652  swatch.reset();
653  swatch.start();
654 
656 
657  T p ;
658  T Ap;
659  T r,z ;
660 
661  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
662  Double alphaprev, alpha,pAp;
663  Real beta ;
664  Double r_dot_z, r_dot_z_old ;
665  //Complex r_dot_z, r_dot_z_old,beta ;
666  //Complex alpha,pAp ;
667 
668  int k = 0 ;
669  A(Ap,x,PLUS) ;
670  r[A.subset()] = b - Ap ;
671  Double r_norm2 = norm2(r,A.subset()) ;
672 
673 #if 1
674  QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size() << std::endl;
675  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2 << std::endl;
676 #endif
677  Real inorm(Real(1.0/sqrt(r_norm2)));
678  bool FindEvals = (Neig>0);
679  int tr; // Don't know what this does...
680  bool from_restart;
681  Matrix<DComplex> H(Nmax) ; // square matrix containing the tridiagonal
682  Vectors<T> vec(Nmax) ; // contains the vectors we use...
683  //-------- Eigenvalue eigenstd::vector finding code ------
684  vec.AddVectors(evec,A.subset()) ;
685  //QDPIO::cout<<"vec.N="<<vec.N<<std::endl ;
686  p[A.subset()] = inorm*r ; // this is not needed GramSchmidt will take care of it
687  vec.AddOrReplaceVector(p,A.subset());
688  //QDPIO::cout<<"vec.N="<<vec.N<<std::endl ;
689  normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset());
690  normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset()); //call twice: need to improve...
691  SubSpaceMatrix(H,A,vec.vec,vec.N);
692  from_restart = true ;
693  tr = H.N - 1; // this is just a flag as far as I can tell
694  //-------
695 
696  Double betaprev ;
697  beta=0.0;
698  alpha = 1.0;
699  // Algorithm from page 529 of Golub and Van Loan
700  // Modified to match the m-code from A. Stathopoulos
701  while(toBool(r_norm2>rsd_sq)){
702  /** preconditioning algorithm **/
703  z[A.subset()]=r ; //preconditioning can be added here
704  /**/
705  r_dot_z = innerProductReal(r,z,A.subset());
706  k++ ;
707  betaprev = beta; // additional line for Eigenvalue eigenstd::vector code
708  if(k==1){
709  p[A.subset()] = z ;
710  H.N++ ;
711  }
712  else{
713  beta = r_dot_z/r_dot_z_old ;
714  p[A.subset()] = z + beta*p ;
715  //-------- Eigenvalue eigenstd::vector finding code ------
716  // fist block
717  if(FindEvals){
718  if(!((from_restart)&&(H.N == tr+1))){
719  if(!from_restart){
720  H(H.N-2,H.N-2) = 1/alpha + betaprev/alphaprev;
721  }
722  from_restart = false ;
723  H(H.N-1,H.N-2) = -sqrt(beta)/alpha;
724  H(H.N-2,H.N-1) = -sqrt(beta)/alpha;
725  }
726  H.N++ ;
727  }
728  //---------------------------------------------------
729  }
730  A(Ap,p,PLUS) ;
731  pAp = innerProductReal(p,Ap,A.subset());
732 
733  alphaprev = alpha ;// additional line for Eigenvalue eigenstd::vector code
734  alpha = r_dot_z/pAp ;
735  x[A.subset()] += alpha*p ;
736  r[A.subset()] -= alpha*Ap ;
737  r_norm2 = norm2(r,A.subset()) ;
738  r_dot_z_old = r_dot_z ;
739 
740 
741  //-------- Eigenvalue eigenstd::vector finding code ------
742  // second block
743  if(FindEvals){
744  if (vec.N==Nmax){//we already have stored the maximum number of vectors
745  // The magic begins here....
746  QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<std::endl ;
747  H(Nmax-1,Nmax-1) = 1/alpha + beta/alphaprev;
748 
749 #ifdef DEBUG
750 #ifndef QDP_IS_QDPJIT
751  {
752  std::stringstream tag ;
753  tag<<"H"<<k ;
754  OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
755  }
756  {
757  Matrix<DComplex> tmp(Nmax) ;
758  SubSpaceMatrix(tmp,A,vec.vec,vec.N);
759  std::stringstream tag ;
760  tag<<"H"<<k<<"ex" ;
761  OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
762  }
763 #endif
764 #endif
765  //exit(1);
766  multi2d<DComplex> Hevecs(H.mat) ;
767  multi1d<Double> Heval ;
768  char V = 'V' ; char U = 'U' ;
769  QDPLapack::zheev(V,U,Hevecs,Heval);
770  multi2d<DComplex> Hevecs_old(H.mat) ;
771 
772 #ifdef DEBUG
773  {
774  multi1d<T> tt_vec(vec.size());
775  for(int i(0);i<Nmax;i++){
776  tt_vec[i][A.subset()] = zero ;
777  for(int j(0);j<Nmax;j++)
778  tt_vec[i][A.subset()] +=Hevecs(i,j)*vec[j] ; // NEED TO CHECK THE INDEXINT
779  }
780  // CHECK IF vec are eigenvectors...
781 
782  T Av ;
783  for(int i(0);i<Nmax;i++){
784  A(Av,tt_vec[i],PLUS) ;
785  DComplex rq = innerProduct(tt_vec[i],Av,A.subset());
786  Av[A.subset()] -= Heval[i]*tt_vec[i] ;
787  Double tt = sqrt(norm2(Av,A.subset()));
788  QDPIO::cout<<"1 error eigenstd::vector["<<i<<"] = "<<tt<<" " ;
789  tt = sqrt(norm2(tt_vec[i],A.subset()));
790  QDPIO::cout<<" --- eval = "<<Heval[i]<<" ";
791  QDPIO::cout<<" --- rq = "<<real(rq)<<" ";
792  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
793  }
794  }
795 #endif
796  multi1d<Double> Heval_old ;
797  QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
798  for(int i(0);i<Nmax;i++)
799  Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0 ;
800 #ifdef DEBUG
801 #ifndef QDP_IS_QDPJIT
802  {
803  std::stringstream tag ;
804  tag<<"Hevecs_old"<<k ;
805  OctavePrintOut(Hevecs_old,Nmax,tag.str(),"Hmatrix.m");
806  }
807 
808 #endif
809 #endif
810  tr = Neig + Neig ; // v_old = Neig optimal choice
811 
812  for(int i(Neig);i<tr;i++)
813  for(int j(0);j<Nmax;j++)
814  Hevecs(i,j) = Hevecs_old(i-Neig,j) ;
815 #ifdef DEBUG
816 #ifndef QDP_IS_QDPJIT
817  {
818  std::stringstream tag ;
819  tag<<"Hevecs"<<k ;
820  OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
821  }
822 #endif
823 #endif
824 
825  // Orthogonalize the last Neig vecs (keep the first Neig fixed)
826  // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
827  // TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
828  multi1d<DComplex> TAU ;
829  QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU) ;
830  char R = 'R' ; char L = 'L' ; char N ='N' ; char C = 'C' ;
831  multi2d<DComplex> Htmp = H.mat ;
832  QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
833  QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
834  // Notice that now H is of size 2Neig x 2Neig,
835  // but still with LDA = Nmax
836 #ifdef DEBUG
837 #ifndef QDP_IS_QDPJIT
838  {
839  std::stringstream tag ;
840  tag<<"Htmp"<<k ;
841  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
842  }
843 #endif
844 #endif
845  QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
846  for(int i(Neig); i< 2*Neig;i++ )
847  for(int j(2*Neig); j<Nmax; j++)
848  Htmp(i,j) =0.0;
849 #ifdef DEBUG
850 #ifndef QDP_IS_QDPJIT
851  {
852  std::stringstream tag ;
853  tag<<"evecstmp"<<k ;
854  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
855  }
856 #endif
857 #endif
858  QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
859 #ifdef DEBUG
860 #ifndef QDP_IS_QDPJIT
861  {
862  std::stringstream tag ;
863  tag<<"Yevecstmp"<<k ;
864  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
865  }
866 #endif
867 #endif
868  // Here I need the restart_X bit
869  multi1d<T> tt_vec = vec.vec;
870  for(int i(0);i<2*Neig;i++){
871  vec[i][A.subset()] = zero ;
872  for(int j(0);j<Nmax;j++)
873  vec[i][A.subset()] +=Htmp(i,j)*tt_vec[j] ; // NEED TO CHECK THE INDEXING
874  }
875 
876 #ifdef DEBUG
877 #ifndef QDP_IS_QDPJIT
878  // CHECK IF vec are eigenvectors...
879  {
880  T Av ;
881  for(int i(0);i<2*Neig;i++){
882  A(Av,vec[i],PLUS) ;
883  DComplex rq = innerProduct(vec[i],Av,A.subset());
884  Av[A.subset()] -= Heval[i]*vec[i] ;
885  Double tt = sqrt(norm2(Av,A.subset()));
886  QDPIO::cout<<"error eigenstd::vector["<<i<<"] = "<<tt<<" " ;
887  tt = sqrt(norm2(vec[i],A.subset()));
888  QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
889  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
890  }
891 
892  }
893 #endif
894 #endif
895  H.mat = 0.0 ; // zero out H
896  for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
897 
898  T Ar ;
899  // Need to reorganize so that we aboid this extra matvec
900  A(Ar,r,PLUS) ;
901  Ar /= sqrt(r_norm2);
902  // this has the oposite convension than the subspace matrix
903  // this is the reason the std::vector reconstruction in here does not
904  // need the conj(H) while in the Rayleigh Ritz refinement
905  // it does need it.
906  // In here the only complex matrix elements are these computined
907  // in the next few lines. This is why the inconsistency
908  // does not matter anywhere else.
909  // In the refinement step though all matrix elements are complex
910  // hence things break down.
911  // Need to fix the convension so that we do not
912  // have these inconsistencies.
913  for (int i=0;i<2*Neig;i++){
914  H(2*Neig,i) = innerProduct(vec[i], Ar, A.subset()) ;
915  H(i,2*Neig) = conj(H(2*Neig,i)) ;
916  }
917  H(2*Neig,2*Neig) = innerProduct(r, Ar, A.subset())/sqrt(r_norm2) ;
918 
919  H.N = 2*Neig + 1 ; // why this ?
920  from_restart = true ;
921  vec.N = 2*Neig ; // only keep the lowest Neig. Is this correct???
922 #ifdef DEBUG
923 #ifndef QDP_IS_QDPJIT
924  {
925  std::stringstream tag ;
926  tag<<"finalH"<<k ;
927  OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
928  }
929 #endif
930 #endif
931  }
932  // Here we add a std::vector in the list
933  Double inorm = 1.0/sqrt(r_norm2) ;
934  vec.NormalizeAndAddVector(r,inorm,A.subset()) ;
935  // Shouldn't be the z std::vector when a preconditioner is used?
936  }
937  //---------------------------------------------------
938 
939  if(k>MaxCG){
940  res.n_count = k;
941  res.resid = sqrt(r_norm2);
942  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
943  END_CODE();
944  return res;
945  }
946 #if 1
947  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2 ;
948  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
949 #endif
950  }
951  res.n_count = k ;
952  res.resid = sqrt(r_norm2);
953  if(FindEvals)
954  {
955  // Evec Code ------ Before we return --------
956  // vs is the current number of vectors stored
957  // Neig is the number of eigenstd::vector we want to compute
958  normGramSchmidt(vec.vec,0,2*Neig,A.subset());
959  normGramSchmidt(vec.vec,0,2*Neig,A.subset());
960  Matrix<DComplex> Htmp(2*Neig) ;
961  SubSpaceMatrix(Htmp,A,vec.vec,2*Neig);
962 
963  char V = 'V' ; char U = 'U' ;
964  multi1d<Double> tt_eval ;
965  QDPLapack::zheev(V,U,Htmp.mat,tt_eval);
966  evec.resize(Neig) ;
967  eval.resize(Neig);
968  for(int i(0);i<Neig;i++){
969  evec[i][A.subset()] = zero ;
970  eval[i] = tt_eval[i] ;
971  for(int j(0);j<2*Neig;j++)
972  evec[i][A.subset()] += Htmp(i,j)*vec[j] ;
973  }
974 
975  // I will do the checking of eigenstd::vector quality outside this routine
976  // -------------------------------------------
977 #ifdef DEBUG_FINAL
978  // CHECK IF vec are eigenvectors...
979  {
980  T Av ;
981  for(int i(0);i<Neig;i++)
982  {
983  A(Av,evec[i],PLUS) ;
984  DComplex rq = innerProduct(evec[i],Av,A.subset());
985  Av[A.subset()] -= eval[i]*evec[i] ;
986  Double tt = sqrt(norm2(Av,A.subset()));
987  QDPIO::cout<<"FINAL: error eigenstd::vector["<<i<<"] = "<<tt<<" " ;
988  tt = sqrt(norm2(evec[i],A.subset()));
989  QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
990  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
991  }
992 
993  }
994 #endif
995  }
996 
997  res.n_count = k;
998  res.resid = sqrt(r_norm2);
999  swatch.stop();
1000  QDPIO::cout << "InvEigCG2: k = " << k << std::endl;
1001  flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
1002  END_CODE();
1003  return res;
1004  }
1005 
1006 #endif
1007 
1008  // A should be Hermitian positive definite
1009  template<typename T>
1011  T& x,
1012  const T& b,
1013  const multi1d<Double>& eval,
1014  const multi1d<T>& evec,
1015  int startV, int endV,
1016  const Real& RsdCG, int MaxCG)
1017  {
1018  START_CODE();
1019 
1020  FlopCounter flopcount;
1021  flopcount.reset();
1022  StopWatch swatch;
1023  swatch.reset();
1024  swatch.start();
1025 
1026  if(endV>eval.size()){
1027  QDP_error_exit("vPrecondCG: not enought evecs eval.size()=%d",eval.size());
1028  }
1029 
1031 
1032  T p ;
1033  T Ap;
1034  T r,z ;
1035 
1036  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
1037  Double alpha,pAp;
1038  Real beta ;
1039  Double r_dot_z, r_dot_z_old ;
1040  //Complex r_dot_z, r_dot_z_old,beta ;
1041  //Complex alpha,pAp ;
1042 
1043  int k = 0 ;
1044  A(Ap,x,PLUS) ;
1045  r[A.subset()] = b - Ap ;
1046  Double r_norm2 = norm2(r,A.subset()) ;
1047 
1048 #if 1
1049  QDPIO::cout << "vecPrecondCG: k = " << k << " res^2 = " << r_norm2 << std::endl;
1050 #endif
1051 
1052  // Algorithm from page 529 of Golub and Van Loan
1053  while(toBool(r_norm2>rsd_sq)){
1054  /** preconditioning algorithm **/
1055  z[A.subset()]=r ; // not optimal but do it for now...
1056  /**/
1057  for(int i(startV);i<endV;i++){
1058  DComplex d = innerProduct(evec[i],r,A.subset()) ;
1059  //QDPIO::cout<<"vecPrecondCG: "<< d<<" "<<(1.0/eval[i]-1.0)<<std::endl;
1060  z[A.subset()] += (1.0/eval[i]-1.0)*d*evec[i];
1061  }
1062  /**/
1063  /**/
1064  r_dot_z = innerProductReal(r,z,A.subset());
1065  k++ ;
1066  if(k==1){
1067  p[A.subset()] = z ;
1068  }
1069  else{
1070  beta = r_dot_z/r_dot_z_old ;
1071  p[A.subset()] = z + beta*p ;
1072  }
1073  //Cheb.Qsq(Ap,p) ;
1074  A(Ap,p,PLUS) ;
1075  pAp = innerProductReal(p,Ap,A.subset());
1076 
1077  alpha = r_dot_z/pAp ;
1078  x[A.subset()] += alpha*p ;
1079  r[A.subset()] -= alpha*Ap ;
1080  r_norm2 = norm2(r,A.subset()) ;
1081  r_dot_z_old = r_dot_z ;
1082 
1083  if(k>MaxCG){
1084  res.n_count = k ;
1085  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
1086  return res;
1087  }
1088 #if 1
1089  QDPIO::cout << "vecPrecondCG: k = " << k << " res^2 = " << r_norm2 ;
1090  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
1091 #endif
1092  }
1093 
1094  res.n_count = k ;
1095  res.resid = sqrt(r_norm2);
1096  swatch.stop();
1097  QDPIO::cout << "vPreconfCG: k = " << k << std::endl;
1098  flopcount.report("vPrecondCG", swatch.getTimeInSeconds());
1099  END_CODE();
1100  return res ;
1101  }
1102 
1103  template<typename T>
1105  T& x,
1106  const T& b,
1107  const multi1d<Double>& eval,
1108  const multi1d<T>& evec,
1109  int& n_count)
1110  {
1111  int N = evec.size();
1112  InitGuess(A,x,b,eval,evec,N,n_count);
1113  }
1114 
1115  template<typename T>
1117  T& x,
1118  const T& b,
1119  const multi1d<Double>& eval,
1120  const multi1d<T>& evec,
1121  int N, // number of vectors to use
1122  int& n_count)
1123  {
1124  T Ap;
1125  T r ;
1126 
1127  StopWatch snoop;
1128  //snoop.reset();
1129  //snoop.start();
1130 
1131  A(Ap,x,PLUS) ;
1132  r[A.subset()] = b - Ap ;
1133  // Double r_norm2 = norm2(r,A.subset()) ;
1134 
1135  for(int i(0);i<N;i++){
1136  DComplex d = innerProduct(evec[i],r,A.subset());
1137  x[A.subset()] += (d/eval[i])*evec[i];
1138  //QDPIO::cout<<"InitCG: "<<d<<" "<<eval[i]<<std::endl ;
1139 
1140  }
1141 
1142  //snoop.stop();
1143  /**
1144  QDPIO::cout << "InitGuess: time = "
1145  << snoop.getTimeInSeconds()
1146  << " secs" << std::endl;
1147  **/
1148  n_count = 1 ;
1149  }
1150 
1151 
1152  //
1153  // Wrappers
1154  //
1155  // LatticeFermionF
1158  const multi1d<LatticeFermionF>& evec,
1159  int Nvecs)
1160  {
1161  SubSpaceMatrix_T<LatticeFermionF>(H, A, evec, Nvecs);
1162  }
1163 
1166  const multi1d<LatticeFermionF>& evec,
1167  const multi1d<Double>& eval,
1168  int Nvecs,int NgoodEvecs){
1169  SubSpaceMatrix_T<LatticeFermionF>(H, A, evec, eval, Nvecs, NgoodEvecs) ;
1170  }
1171 
1173  LatticeFermionF& x,
1174  const LatticeFermionF& b,
1175  multi1d<Double>& eval,
1176  multi1d<LatticeFermionF>& evec,
1177  int Neig,
1178  int Nmax,
1179  const Real& RsdCG, int MaxCG,
1180  const int PrintLevel)
1181  {
1182  return InvEigCG2_T<LatticeFermionF>(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG, PrintLevel);
1183  }
1184 
1186  LatticeFermionF& x,
1187  const LatticeFermionF& b,
1188  const multi1d<Double>& eval,
1189  const multi1d<LatticeFermionF>& evec,
1190  int startV, int endV,
1191  const Real& RsdCG, int MaxCG)
1192  {
1193  return vecPrecondCG_T<LatticeFermionF>(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
1194  }
1195 
1197  LatticeFermionF& x,
1198  const LatticeFermionF& b,
1199  const multi1d<Double>& eval,
1200  const multi1d<LatticeFermionF>& evec,
1201  int& n_count)
1202  {
1203  InitGuess_T(A, x, b, eval, evec, n_count);
1204  }
1205 
1207  LatticeFermionF& x,
1208  const LatticeFermionF& b,
1209  const multi1d<Double>& eval,
1210  const multi1d<LatticeFermionF>& evec,
1211  int N, // number of vectors to use
1212  int& n_count)
1213  {
1214  InitGuess_T(A, x, b, eval, evec, N, n_count);
1215  }
1216 
1217 
1218  // LatticeFermionD
1221  const multi1d<LatticeFermionD>& evec,
1222  int Nvecs)
1223  {
1224  SubSpaceMatrix_T<LatticeFermionD>(H, A, evec, Nvecs);
1225  }
1226 
1229  const multi1d<LatticeFermionD>& evec,
1230  const multi1d<Double>& eval,
1231  int Nvecs,int NgoodEvecs){
1232  SubSpaceMatrix_T<LatticeFermionD>(H, A, evec, eval, Nvecs, NgoodEvecs) ;
1233  }
1234 
1235 
1237  LatticeFermionD& x,
1238  const LatticeFermionD& b,
1239  multi1d<Double>& eval,
1240  multi1d<LatticeFermionD>& evec,
1241  int Neig,
1242  int Nmax,
1243  const Real& RsdCG, int MaxCG,
1244  const int plvl)
1245  {
1246  return InvEigCG2_T<LatticeFermionD>(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG,plvl);
1247  }
1248 
1250  LatticeFermionD& x,
1251  const LatticeFermionD& b,
1252  const multi1d<Double>& eval,
1253  const multi1d<LatticeFermionD>& evec,
1254  int startV, int endV,
1255  const Real& RsdCG, int MaxCG)
1256  {
1257  return vecPrecondCG_T<LatticeFermionD>(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
1258  }
1259 
1261  LatticeFermionD& x,
1262  const LatticeFermionD& b,
1263  const multi1d<Double>& eval,
1264  const multi1d<LatticeFermionD>& evec,
1265  int& n_count)
1266  {
1267  InitGuess_T(A, x, b, eval, evec, n_count);
1268  }
1269 
1271  LatticeFermionD& x,
1272  const LatticeFermionD& b,
1273  const multi1d<Double>& eval,
1274  const multi1d<LatticeFermionD>& evec,
1275  int N, // number of vectors to use
1276  int& n_count)
1277  {
1278  InitGuess_T(A, x, b, eval, evec, N, n_count);
1279  }
1280  } // namespace InvEigCG2Env
1281 
1282 }// End Namespace Chroma
1283 
This is a square matrix.
Definition: containers.h:328
void AddVectors(multi1d< T > &v, const Subset &s)
Definition: containers.h:204
void AddOrReplaceVector(const T &v, const Subset &s)
Definition: containers.h:193
void NormalizeAndAddVector(const T &v, const Double &inorm, const Subset &s)
Definition: containers.h:185
Linear Operator.
Definition: linearop.h:27
void normGramSchmidt(multi1d< LatticeFermionF > &vec, int f, int t, const Subset &sub)
Gram-Schmidt with normalization.
Conjugate-Gradient algorithm with eigenstd::vector acceleration.
unsigned j
Definition: ldumul_w.cc:35
int z
Definition: meslate.cc:36
int x
Definition: meslate.cc:34
SpinMatrix C()
C = Gamma(10)
Definition: barspinmat_w.cc:29
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
SystemSolverResults_t vecPrecondCG(const LinearOperator< LatticeFermionD > &A, LatticeFermionD &x, const LatticeFermionD &b, const multi1d< Double > &eval, const multi1d< LatticeFermionD > &evec, int startV, int endV, const Real &RsdCG, int MaxCG)
Definition: inv_eigcg2.cc:1249
void InitGuess(const LinearOperator< LatticeFermionD > &A, LatticeFermionD &x, const LatticeFermionD &b, const multi1d< Double > &eval, const multi1d< LatticeFermionD > &evec, int N, int &n_count)
Definition: inv_eigcg2.cc:1270
SystemSolverResults_t new_InvEigCG2_T(const LinearOperator< T > &A, T &x, const T &b, multi1d< Double > &eval, multi1d< T > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG, const int PrintLevel)
Definition: inv_eigcg2.cc:98
void SubSpaceMatrix(LinAlg::Matrix< DComplex > &H, const LinearOperator< LatticeFermionD > &A, const multi1d< LatticeFermionD > &evec, const multi1d< Double > &eval, int Nvecs, int NgoodEvecs)
Definition: inv_eigcg2.cc:1227
SystemSolverResults_t vecPrecondCG_T(const LinearOperator< T > &A, T &x, const T &b, const multi1d< Double > &eval, const multi1d< T > &evec, int startV, int endV, const Real &RsdCG, int MaxCG)
Definition: inv_eigcg2.cc:1010
SystemSolverResults_t InvEigCG2_T(const LinearOperator< T > &A, T &x, const T &b, multi1d< Double > &eval, multi1d< T > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG, const int PrintLevel)
Definition: inv_eigcg2.cc:373
void SubSpaceMatrix_T(LinAlg::Matrix< DComplex > &H, const LinearOperator< T > &A, const multi1d< T > &evec, const multi1d< Double > &eval, int Nvecs, int NgoodEvecs)
Definition: inv_eigcg2.cc:62
void InitGuess_T(const LinearOperator< T > &A, T &x, const T &b, const multi1d< Double > &eval, const multi1d< T > &evec, int N, int &n_count)
Definition: inv_eigcg2.cc:1116
SystemSolverResults_t InvEigCG2(const LinearOperator< LatticeFermionD > &A, LatticeFermionD &x, const LatticeFermionD &b, multi1d< Double > &eval, multi1d< LatticeFermionD > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG, const int plvl)
Definition: inv_eigcg2.cc:1236
static const LatticeInteger & beta(const int dim)
Definition: stag_phases_s.h:47
static const LatticeInteger & alpha(const int dim)
Definition: stag_phases_s.h:43
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
DComplex d
Definition: invbicg.cc:99
START_CODE()
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
std::string tag(const std::string &prefix)
Definition: octave.h:15
FloatingPoint< double > Double
Definition: gtest.h:7351
Gram-Schmidt with normalization.
Holds return info from SystemSolver call.
Definition: syssolver.h:17
multi1d< LatticeColorMatrix > U
LatticeFermion T
Definition: t_clover.cc:11