CHROMA
inv_eigcg2_array.cc
Go to the documentation of this file.
1 // -*- C++ -*-
2 /*! \file
3  * \brief Conjugate-Gradient algorithm with eigenstd::vector acceleration
4  */
5 
6 #include <qdp-lapack.h>
7 //#include "octave_debug.h"
8 //#include "octave_debug.cc"
9 
11 
14 
15 //#define DEBUG
16 #define DEBUG_FINAL
17 
18 
19 // NEEDS A LOT OF CLEAN UP
20 namespace Chroma
21 {
22  using namespace LinAlg;
23  //using namespace Octave;
24 
25  namespace InvEigCG2ArrayEnv
26  {
27  // needs a little clean up... for possible exceptions if the size of H
28  // is smaller than hind
29  // LatticeFermionF
30  template<typename T>
33  const multi2d<T>& evec,
34  int Nvecs)
35  {
36  H.N = Nvecs;
37  if(Nvecs>evec.size2()){
38  H.N = evec.size2();
39  }
40  if(H.N>H.size()){
41  QDPIO::cerr<<"OOPS! your matrix can't take this many matrix elements\n";
42  exit(1);
43  }
44  // fill H with zeros
45  H.mat = 0.0;
46 
47  multi1d<T> Ap;
48  for(int i(0);i<H.N;i++)
49  {
50  A(Ap,evec[i],PLUS);
51  for(int j(i);j<H.N;j++)
52  {
53  H(j,i) = innerProduct(evec[j], Ap, A.subset());
54  //enforce hermiticity
55  H(i,j) = conj(H(j,i));
56  if(i==j) H(i,j) = real(H(i,j));
57  }
58  }
59  }
60 
61 
62  template<typename T>
64  multi1d<T>& x,
65  const multi1d<T>& b,
66  multi1d<Double>& eval,
67  multi2d<T>& evec,
68  int Neig,
69  int Nmax,
70  const Real& RsdCG, int MaxCG)
71  {
72  START_CODE();
73 
74  FlopCounter flopcount;
75  flopcount.reset();
76  StopWatch swatch;
77  swatch.reset();
78  swatch.start();
79 
80  const int Ls = A.size();
81 
83 
84  multi1d<T> p;
85  multi1d<T> Ap;
86  multi1d<T> r;
87  //multi1d<T> z;
88 
89  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
90  Double alphaprev, alpha,pAp;
91  Real beta;
92  Double betaprev ;
93  Double r_dot_z, r_dot_z_old;
94 
95  int k = 0;
96  A(Ap,x,PLUS);
97  for(int l=0; l < Ls; ++l)
98  r[l][A.subset()] = b[l] - Ap[l];
99 
100 
101 #if 1
102  QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size2() << std::endl;
103  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_dot_z << std::endl;
104 #endif
105  Matrix<DComplex> H(Nmax); // square matrix containing the tridiagonal
106  VectorArrays<T> vec(Nmax,Ls); // contains the vectors we use...
107 
108 
109  beta=0.0;
110  alpha = 1.0;
111  multi1d<T> Ap_prev;
112  multi1d<T> tt;
113 
114  // Algorithm from page 529 of Golub and Van Loan
115  // Modified to match the m-code from A. Stathopoulos
116  while(toBool(r_dot_z>rsd_sq))
117  {
118  /** preconditioning algorithm **/
119  //z[A.subset()]=r; //preconditioning can be added here
120  //r_dot_z = innerProductReal(r,z,A.subset());
121  //****//
122  r_dot_z_old = r_dot_z;
123  r_dot_z = norm2(r,A.subset());
124  Double inv_sqrt_r_dot_z = 1.0/sqrt(r_dot_z);
125  k++;
126  if(k==1){
127  //p[A.subset()] = z;
128  for(int l=0; l < Ls; ++l)
129  p[l][A.subset()] = r[l];
130  }
131  else{
132  betaprev = beta;
133  beta = r_dot_z/r_dot_z_old;
134  //p[A.subset()] = z + beta*p;
135  for(int l=0; l < Ls; ++l)
136  p[l][A.subset()] = r[l] + beta*p[l];
137  }
138  //-------- Eigenvalue eigenstd::vector finding code ------
139  if((Neig>0)&& (H.N == Nmax))
140  for(int l=0; l < Ls; ++l)
141  Ap_prev[l][A.subset()] = Ap[l];
142  //---------------------------------------------------
143  A(Ap,p,PLUS);
144 
145  //-------- Eigenvalue eigenstd::vector finding code ------
146  if(Neig>0)
147  {
148  if(k>1)
149  H(H.N-1,H.N-1) = 1/alpha + betaprev/alphaprev;
150  if(vec.N==Nmax)
151  {
152  QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<std::endl;
153 #ifdef DEBUG
154 #ifndef QDP_IS_QDPJIT
155  {
156  std::stringstream tag;
157  tag<<"H"<<k;
158  OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
159  }
160  {
161  Matrix<DComplex> tmp(Nmax);
162  SubSpaceMatrix(tmp,A,vec.vec,vec.N);
163  std::stringstream tag;
164  tag<<"H"<<k<<"ex";
165  OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
166  }
167 #endif
168 #endif
169  multi2d<DComplex> Hevecs(H.mat);
170  multi1d<Double> Heval;
171  char V = 'V'; char U = 'U';
172  QDPLapack::zheev(V,U,Nmax,Hevecs,Heval);
173 #ifdef DEBUG
174 #ifndef QDP_IS_QDPJIT
175  {
176  std::stringstream tag;
177  tag<<"Hevecs"<<k;
178  OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
179  }
180  for(int i(0);i<Nmax;i++)
181  QDPIO::cout<<" eignvalue: "<<Heval[i]<<std::endl;
182 #endif
183 #endif
184  multi2d<DComplex> Hevecs_old(H.mat);
185  multi1d<Double> Heval_old;
186  QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
187  for(int i(0);i<Nmax;i++)
188  Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0;
189 
190  //Reduce to 2*Neig vectors
191  H.N = Neig + Neig; // Thickness of restart 2*Neig
192 
193  for(int i(Neig);i<2*Neig;i++)
194  for(int j(0);j<Nmax;j++)
195  Hevecs(i,j) = Hevecs_old(i-Neig,j);
196 
197  // Orthogonalize the last Neig vecs (keep the first Neig fixed)
198  // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
199  // TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
200  multi1d<DComplex> TAU;
201  QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
202  char R = 'R'; char L = 'L'; char N ='N'; char C = 'C';
203  multi2d<DComplex> Htmp = H.mat;
204  QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
205  QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
206 
207  QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
208 #ifdef DEBUG
209 #ifndef QDP_IS_QDPJIT
210  {
211  std::stringstream tag;
212  tag<<"Htmp"<<k;
213  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
214  }
215 #endif
216 #endif
217  for(int i(Neig); i< 2*Neig;i++ ) // mhpws prepei na einai 0..2*Neig
218  for(int j(2*Neig); j<Nmax; j++)
219  Htmp(i,j) =0.0;
220 
221 #ifdef DEBUG
222 #ifndef QDP_IS_QDPJIT
223  {
224  std::stringstream tag;
225  tag<<"HtmpBeforeZUM"<<k;
226  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
227  }
228 #endif
229 #endif
230  QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
231 #ifdef DEBUG
232 #ifndef QDP_IS_QDPJIT
233  {
234  std::stringstream tag;
235  tag<<"HtmpAfeterZUM"<<k;
236  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
237  }
238 #endif
239 #endif
240 
241 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
242  multi2d<T> tt_vec(2*Neig,Ls);
243  for(int i(0);i<2*Neig;i++){
244  for(int l=0; l < Ls; ++l)
245  tt_vec[i][l][A.subset()] = zero;
246  for(int j(0);j<Nmax;j++)
247  for(int l=0; l < Ls; ++l)
248  tt_vec[i][l][A.subset()] +=Htmp(i,j)*vec[j][l];
249  }
250  for(int i(0);i<2*Neig;i++)
251  for(int l=0; l < Ls; ++l)
252  vec[i][l][A.subset()] = tt_vec[i][l];
253 #else
254  // Here I need the restart_X bit
255  // zgemm("N", "N", Ns*Nc*Vol/2, 2*Neig, Nmax, 1.0,
256  // vec.vec, Ns*Nc*Vol, Htmp, Nmax+1, 0.0, tt_vec, Ns*Nc*Vol);
257  // copy apo tt_vec se vec.vec
258 
259 #endif
260  vec.N = 2*Neig; // restart the vectors to keep
261 
262  H.mat = 0.0; // zero out H
263  for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
264 
265  //A(tt,r,PLUS);
266  for(int l=0; l < Ls; ++l)
267  tt[l] = Ap[l] - beta*Ap_prev[l]; //avoid the matvec
268 
269 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
270  for (int i=0;i<2*Neig;i++){
271  H(2*Neig,i)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z;
272  H(i,2*Neig)=conj(H(2*Neig,i));
273  //H(i,2*Neig)=innerProduct(vec[i],tt,A.subset())*inv_sqrt_r_dot_z;
274  //H(2*Neig,i)=conj(H(i,2*Neig));
275  }
276 #else
277  // Optimized code for creating the H matrix row and column
278  // asume even-odd layout: Can I check if this is the case at
279  // compile time? or even at run time? This is a good test to
280  // have to avoid wrong results.
281 
282 
283 #endif
284  }//H.N==Nmax
285  else{
286  if(k>1)
287  H(H.N-1,H.N) = H(H.N,H.N-1) = -sqrt(beta)/alpha;
288  }
289  H.N++;
290  //vec.NormalizeAndAddVector(z,inv_sqrt_r_dot_z,A.subset());
291  vec.NormalizeAndAddVector(r,inv_sqrt_r_dot_z,A.subset());
292  }
293 
294  pAp = innerProductReal(p,Ap,A.subset());
295  alphaprev = alpha;// additional line for Eigenvalue eigenstd::vector code
296  alpha = r_dot_z/pAp;
297  for(int l=0; l < Ls; ++l)
298  x[l][A.subset()] += alpha*p[l];
299  for(int l=0; l < Ls; ++l)
300  r[l][A.subset()] -= alpha*Ap[l];
301 
302 
303  //---------------------------------------------------
304  if(k>MaxCG){
305  res.n_count = k;
306  res.resid = sqrt(r_dot_z);
307  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
308  END_CODE();
309  return res;
310  }
311 #if 1
312  QDPIO::cout << "InvEigCG2: k = " << k ;
313  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
314 #endif
315  }//end CG loop
316 
317  if(Neig>0)
318  {
319  evec.resize(Neig,Ls);
320  eval.resize(Neig);
321 
322 #define USE_LAST_VECTORS
323 #ifdef USE_LAST_VECTORS
324 
325  multi2d<DComplex> Hevecs(H.mat);
326  multi1d<Double> Heval;
327  char V = 'V'; char U = 'U';
328  QDPLapack::zheev(V,U,H.N-1,Hevecs,Heval);
329 
330  for(int i(0);i<Neig;i++){
331  for(int l=0; l < Ls; ++l)
332  evec[i][l][A.subset()] = zero;
333  eval[i] = Heval[i];
334  for(int j(0);j<H.N-1;j++)
335  for(int l=0; l < Ls; ++l)
336  evec[i][l][A.subset()] +=Hevecs(i,j)*vec[j][l];
337  }
338 #else
339  for(int i(0);i<Neig;i++){
340  evec[i][A.subset()] = vec[i];
341  eval[i] = real(H(i,i));
342  }
343 #endif
344  }
345  res.n_count = k;
346  res.resid = sqrt(r_dot_z);
347  swatch.stop();
348  QDPIO::cout << "InvEigCG2: k = " << k << std::endl;
349  flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
350  END_CODE();
351  return res;
352  }
353 
354 
355  template<typename T>
357  multi1d<T>& x,
358  const multi1d<T>& b,
359  multi1d<Double>& eval,
360  multi2d<T>& evec,
361  int Neig,
362  int Nmax,
363  const Real& RsdCG, int MaxCG)
364  {
365  START_CODE();
366 
367  FlopCounter flopcount;
368  flopcount.reset();
369  StopWatch swatch;
370  swatch.reset();
371  swatch.start();
372 
373  const int Ls = A.size();
374 
376 
377  multi1d<T> p;
378  multi1d<T> Ap;
379  multi1d<T> r,z;
380 
381  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
382  Double alphaprev, alpha,pAp;
383  Real beta;
384  Double r_dot_z, r_dot_z_old;
385  //Complex r_dot_z, r_dot_z_old,beta;
386  //Complex alpha,pAp;
387 
388  int k = 0;
389  A(Ap,x,PLUS);
390  r[A.subset()] = b - Ap;
391  Double r_norm2 = norm2(r,A.subset());
392 
393 #if 1
394  QDPIO::cout << "InvEigCG2: Nevecs(input) = " << evec.size2() << std::endl;
395  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2 << std::endl;
396 #endif
397  Real inorm(Real(1.0/sqrt(r_norm2)));
398  bool FindEvals = (Neig>0);
399  int tr; // Don't know what this does...
400  bool from_restart;
401  Matrix<DComplex> H(Nmax); // square matrix containing the tridiagonal
402  VectorArrays<T> vec(Nmax,Ls); // contains the vectors we use...
403  //-------- Eigenvalue eigenstd::vector finding code ------
404  vec.AddVectors(evec,A.subset());
405  //QDPIO::cout<<"vec.N="<<vec.N<<std::endl;
406  p[A.subset()] = inorm*r; // this is not needed GramSchmidt will take care of it
407  vec.AddOrReplaceVector(p,A.subset());
408  //QDPIO::cout<<"vec.N="<<vec.N<<std::endl;
409  normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset());
410  normGramSchmidt(vec.vec,vec.N-1,vec.N,A.subset()); //call twice: need to improve...
411  SubSpaceMatrix(H,A,vec.vec,vec.N);
412  from_restart = true;
413  tr = H.N - 1; // this is just a flag as far as I can tell
414  //-------
415 
416  Double betaprev;
417  beta=0.0;
418  alpha = 1.0;
419  // Algorithm from page 529 of Golub and Van Loan
420  // Modified to match the m-code from A. Stathopoulos
421  while(toBool(r_norm2>rsd_sq))
422  {
423  /** preconditioning algorithm **/
424  z[A.subset()]=r; //preconditioning can be added here
425  /**/
426  r_dot_z = innerProductReal(r,z,A.subset());
427  k++;
428  betaprev = beta; // additional line for Eigenvalue eigenstd::vector code
429  if(k==1){
430  p[A.subset()] = z;
431  H.N++;
432  }
433  else{
434  beta = r_dot_z/r_dot_z_old;
435  p[A.subset()] = z + beta*p;
436  //-------- Eigenvalue eigenstd::vector finding code ------
437  // fist block
438  if(FindEvals){
439  if(!((from_restart)&&(H.N == tr+1))){
440  if(!from_restart){
441  H(H.N-2,H.N-2) = 1/alpha + betaprev/alphaprev;
442  }
443  from_restart = false;
444  H(H.N-1,H.N-2) = -sqrt(beta)/alpha;
445  H(H.N-2,H.N-1) = -sqrt(beta)/alpha;
446  }
447  H.N++;
448  }
449  //---------------------------------------------------
450  }
451  A(Ap,p,PLUS);
452  pAp = innerProductReal(p,Ap,A.subset());
453 
454  alphaprev = alpha;// additional line for Eigenvalue eigenstd::vector code
455  alpha = r_dot_z/pAp;
456  x[A.subset()] += alpha*p;
457  r[A.subset()] -= alpha*Ap;
458  r_norm2 = norm2(r,A.subset());
459  r_dot_z_old = r_dot_z;
460 
461 
462  //-------- Eigenvalue eigenstd::vector finding code ------
463  // second block
464  if(FindEvals){
465  if (vec.N==Nmax){//we already have stored the maximum number of vectors
466  // The magic begins here....
467  QDPIO::cout<<"MAGIC BEGINS: H.N ="<<H.N<<std::endl;
468  H(Nmax-1,Nmax-1) = 1/alpha + beta/alphaprev;
469 
470 #ifdef DEBUG
471 #ifndef QDP_IS_QDPJIT
472  {
473  std::stringstream tag;
474  tag<<"H"<<k;
475  OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
476  }
477  {
478  Matrix<DComplex> tmp(Nmax);
479  SubSpaceMatrix(tmp,A,vec.vec,vec.N);
480  std::stringstream tag;
481  tag<<"H"<<k<<"ex";
482  OctavePrintOut(tmp.mat,Nmax,tag.str(),"Hmatrix.m");
483  }
484 #endif
485 #endif
486  //exit(1);
487  multi2d<DComplex> Hevecs(H.mat);
488  multi1d<Double> Heval;
489  char V = 'V'; char U = 'U';
490  QDPLapack::zheev(V,U,Hevecs,Heval);
491  multi2d<DComplex> Hevecs_old(H.mat);
492 
493 #ifdef DEBUG
494 #ifndef QDP_IS_QDPJIT
495  {
496  multi1d<T> tt_vec(vec.size());
497  for(int i(0);i<Nmax;i++){
498  tt_vec[i][A.subset()] = zero;
499  for(int j(0);j<Nmax;j++)
500  tt_vec[i][A.subset()] +=Hevecs(i,j)*vec[j]; // NEED TO CHECK THE INDEXINT
501  }
502  // CHECK IF vec are eigenvectors...
503 
504  multi1d<T> Av;
505  for(int i(0);i<Nmax;i++){
506  A(Av,tt_vec[i],PLUS);
507  DComplex rq = innerProduct(tt_vec[i],Av,A.subset());
508  Av[A.subset()] -= Heval[i]*tt_vec[i];
509  Double tt = sqrt(norm2(Av,A.subset()));
510  QDPIO::cout<<"1 error eigenstd::vector["<<i<<"] = "<<tt<<" ";
511  tt = sqrt(norm2(tt_vec[i],A.subset()));
512  QDPIO::cout<<" --- eval = "<<Heval[i]<<" ";
513  QDPIO::cout<<" --- rq = "<<real(rq)<<" ";
514  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
515  }
516  }
517 #endif
518 #endif
519  multi1d<Double> Heval_old;
520  QDPLapack::zheev(V,U,Nmax-1,Hevecs_old,Heval_old);
521  for(int i(0);i<Nmax;i++)
522  Hevecs_old(i,Nmax-1) = Hevecs_old(Nmax-1,i) = 0.0;
523 #ifdef DEBUG
524 #ifndef QDP_IS_QDPJIT
525  {
526  std::stringstream tag;
527  tag<<"Hevecs_old"<<k;
528  OctavePrintOut(Hevecs_old,Nmax,tag.str(),"Hmatrix.m");
529  }
530 
531 #endif
532 #endif
533  tr = Neig + Neig; // v_old = Neig optimal choice
534 
535  for(int i(Neig);i<tr;i++)
536  for(int j(0);j<Nmax;j++)
537  Hevecs(i,j) = Hevecs_old(i-Neig,j);
538 #ifdef DEBUG
539 #ifndef QDP_IS_QDPJIT
540  {
541  std::stringstream tag;
542  tag<<"Hevecs"<<k;
543  OctavePrintOut(Hevecs,Nmax,tag.str(),"Hmatrix.m");
544  }
545 #endif
546 #endif
547 
548  // Orthogonalize the last Neig vecs (keep the first Neig fixed)
549  // zgeqrf(Nmax, 2*Neig, Hevecs, Nmax,
550  // TAU_CMPLX_ofsize_2Neig, Workarr, 2*Neig*Lapackblocksize, info);
551  multi1d<DComplex> TAU;
552  QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
553  char R = 'R'; char L = 'L'; char N ='N'; char C = 'C';
554  multi2d<DComplex> Htmp = H.mat;
555  QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
556  QDPLapack::zunmqr(L,C,Nmax,2*Neig,Hevecs,TAU,Htmp);
557  // Notice that now H is of size 2Neig x 2Neig,
558  // but still with LDA = Nmax
559 #ifdef DEBUG
560 #ifndef QDP_IS_QDPJIT
561  {
562  std::stringstream tag;
563  tag<<"Htmp"<<k;
564  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
565  }
566 #endif
567 #endif
568  QDPLapack::zheev(V,U,2*Neig,Htmp,Heval);
569  for(int i(Neig); i< 2*Neig;i++ )
570  for(int j(2*Neig); j<Nmax; j++)
571  Htmp(i,j) =0.0;
572 #ifdef DEBUG
573 #ifndef QDP_IS_QDPJIT
574  {
575  std::stringstream tag;
576  tag<<"evecstmp"<<k;
577  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
578  }
579 #endif
580 #endif
581  QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
582 #ifdef DEBUG
583 #ifndef QDP_IS_QDPJIT
584  {
585  std::stringstream tag;
586  tag<<"Yevecstmp"<<k;
587  OctavePrintOut(Htmp,Nmax,tag.str(),"Hmatrix.m");
588  }
589 #endif
590 #endif
591  // Here I need the restart_X bit
592  multi1d<T> tt_vec = vec.vec;
593  for(int i(0);i<2*Neig;i++){
594  vec[i][A.subset()] = zero;
595  for(int j(0);j<Nmax;j++)
596  vec[i][A.subset()] +=Htmp(i,j)*tt_vec[j]; // NEED TO CHECK THE INDEXING
597  }
598 
599 #ifdef DEBUG
600 #ifndef QDP_IS_QDPJIT
601  // CHECK IF vec are eigenvectors...
602  {
603  multi1d<T> Av;
604  for(int i(0);i<2*Neig;i++){
605  A(Av,vec[i],PLUS);
606  DComplex rq = innerProduct(vec[i],Av,A.subset());
607  Av[A.subset()] -= Heval[i]*vec[i];
608  Double tt = sqrt(norm2(Av,A.subset()));
609  QDPIO::cout<<"error eigenstd::vector["<<i<<"] = "<<tt<<" ";
610  tt = sqrt(norm2(vec[i],A.subset()));
611  QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
612  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
613  }
614 
615  }
616 #endif
617 #endif
618  H.mat = 0.0; // zero out H
619  for (int i=0;i<2*Neig;i++) H(i,i) = Heval[i];
620 
621  multi1d<T> Ar;
622  // Need to reorganize so that we aboid this extra matvec
623  A(Ar,r,PLUS);
624  Ar /= sqrt(r_norm2);
625  // this has the oposite convension than the subspace matrix
626  // this is the reason the std::vector reconstruction in here does not
627  // need the conj(H) while in the Rayleigh Ritz refinement
628  // it does need it.
629  // In here the only complex matrix elements are these computined
630  // in the next few lines. This is why the inconsistency
631  // does not matter anywhere else.
632  // In the refinement step though all matrix elements are complex
633  // hence things break down.
634  // Need to fix the convension so that we do not
635  // have these inconsistencies.
636  for (int i=0;i<2*Neig;i++){
637  H(2*Neig,i) = innerProduct(vec[i], Ar, A.subset());
638  H(i,2*Neig) = conj(H(2*Neig,i));
639  }
640  H(2*Neig,2*Neig) = innerProduct(r, Ar, A.subset())/sqrt(r_norm2);
641 
642  H.N = 2*Neig + 1 ; // why this ?
643  from_restart = true;
644  vec.N = 2*Neig; // only keep the lowest Neig. Is this correct???
645 #ifdef DEBUG
646 #ifndef QDP_IS_QDPJIT
647  {
648  std::stringstream tag;
649  tag<<"finalH"<<k;
650  OctavePrintOut(H.mat,Nmax,tag.str(),"Hmatrix.m");
651  }
652 #endif
653 #endif
654  }
655  // Here we add a std::vector in the list
656  Double inorm = 1.0/sqrt(r_norm2);
657  vec.NormalizeAndAddVector(r,inorm,A.subset());
658  // Shouldn't be the z std::vector when a preconditioner is used?
659  }
660  //---------------------------------------------------
661 
662  if(k>MaxCG){
663  res.n_count = k;
664  res.resid = sqrt(r_norm2);
665  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
666  END_CODE();
667  return res;
668  }
669 #if 1
670  QDPIO::cout << "InvEigCG2: k = " << k << " res^2 = " << r_norm2;
671  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
672 #endif
673  }
674  res.n_count = k;
675  res.resid = sqrt(r_norm2);
676  if(FindEvals)
677  {
678  // Evec Code ------ Before we return --------
679  // vs is the current number of vectors stored
680  // Neig is the number of eigenstd::vector we want to compute
681  normGramSchmidt(vec.vec,0,2*Neig,A.subset());
682  normGramSchmidt(vec.vec,0,2*Neig,A.subset());
683  Matrix<DComplex> Htmp(2*Neig);
684  SubSpaceMatrix(Htmp,A,vec.vec,2*Neig);
685 
686  char V = 'V'; char U = 'U';
687  multi1d<Double> tt_eval;
688  QDPLapack::zheev(V,U,Htmp.mat,tt_eval);
689  evec.resize(Neig,Ls);
690  eval.resize(Neig);
691  for(int i(0);i<Neig;i++){
692  evec[i][A.subset()] = zero;
693  eval[i] = tt_eval[i];
694  for(int j(0);j<2*Neig;j++)
695  evec[i][A.subset()] += Htmp(i,j)*vec[j];
696  }
697 
698  // I will do the checking of eigenstd::vector quality outside this routine
699  // -------------------------------------------
700 #ifdef DEBUG_FINAL
701  // CHECK IF vec are eigenvectors...
702  {
703  multi1d<T> Av;
704  for(int i(0);i<Neig;i++)
705  {
706  A(Av,evec[i],PLUS);
707  DComplex rq = innerProduct(evec[i],Av,A.subset());
708  Av[A.subset()] -= eval[i]*evec[i];
709  Double tt = sqrt(norm2(Av,A.subset()));
710  QDPIO::cout<<"FINAL: error eigenstd::vector["<<i<<"] = "<<tt<<" ";
711  tt = sqrt(norm2(evec[i],A.subset()));
712  QDPIO::cout<<"--- rq ="<<real(rq)<<" ";
713  QDPIO::cout<<"--- norm = "<<tt<<std::endl ;
714  }
715 
716  }
717 #endif
718  }
719 
720  res.n_count = k;
721  res.resid = sqrt(r_norm2);
722  swatch.stop();
723  QDPIO::cout << "InvEigCG2: k = " << k << std::endl;
724  flopcount.report("InvEigCG2", swatch.getTimeInSeconds());
725  END_CODE();
726  return res;
727  }
728 
729  // A should be Hermitian positive definite
730  template<typename T>
732  multi1d<T>& x,
733  const multi1d<T>& b,
734  const multi1d<Double>& eval,
735  const multi2d<T>& evec,
736  int startV, int endV,
737  const Real& RsdCG, int MaxCG)
738  {
739  START_CODE();
740 
741  FlopCounter flopcount;
742  flopcount.reset();
743  StopWatch swatch;
744  swatch.reset();
745  swatch.start();
746 
747  const int Ls = A.size();
748 
749  if(endV>eval.size()){
750  QDP_error_exit("vPrecondCG: not enought evecs eval.size()=%d",eval.size());
751  }
752 
754 
755  multi1d<T> p;
756  multi1d<T> Ap;
757  multi1d<T> r,z;
758 
759  Double rsd_sq = (RsdCG * RsdCG) * Real(norm2(b,A.subset()));
760  Double alpha,pAp;
761  Real beta;
762  Double r_dot_z, r_dot_z_old;
763  //Complex r_dot_z, r_dot_z_old,beta;
764  //Complex alpha,pAp;
765 
766  int k = 0;
767  A(Ap,x,PLUS);
768  for(int l=0; l < Ls; ++l)
769  r[l][A.subset()] = b[l] - Ap[l];
770  Double r_norm2 = norm2(r,A.subset());
771 
772 #if 1
773  QDPIO::cout << "vecPrecondCG: k = " << k << " res^2 = " << r_norm2 << std::endl;
774 #endif
775 
776  // Algorithm from page 529 of Golub and Van Loan
777  while(toBool(r_norm2>rsd_sq)){
778  /** preconditioning algorithm **/
779  for(int l=0; l < Ls; ++l)
780  z[l][A.subset()] = r[l]; // not optimal but do it for now...
781  /**/
782  for(int i(startV);i<endV;i++){
783  DComplex d = innerProduct(evec[i],r,A.subset());
784  //QDPIO::cout<<"vecPrecondCG: "<< d<<" "<<(1.0/eval[i]-1.0)<<std::endl;
785  for(int l=0; l < Ls; ++l)
786  z[l][A.subset()] += (1.0/eval[i]-1.0)*d*evec[i][l];
787  }
788  /**/
789  /**/
790  r_dot_z = innerProductReal(r,z,A.subset());
791  k++;
792  if(k==1){
793  for(int l=0; l < Ls; ++l)
794  p[l][A.subset()] = z[l];
795  }
796  else{
797  beta = r_dot_z/r_dot_z_old;
798  for(int l=0; l < Ls; ++l)
799  p[l][A.subset()] = z[l] + beta*p[l];
800  }
801  //Cheb.Qsq(Ap,p);
802  A(Ap,p,PLUS);
803  pAp = innerProductReal(p,Ap,A.subset());
804 
805  alpha = r_dot_z/pAp;
806  for(int l=0; l < Ls; ++l)
807  x[l][A.subset()] += alpha*p[l];
808  for(int l=0; l < Ls; ++l)
809  r[l][A.subset()] -= alpha*Ap[l];
810  r_norm2 = norm2(r,A.subset());
811  r_dot_z_old = r_dot_z;
812 
813  if(k>MaxCG){
814  res.n_count = k;
815  QDP_error_exit("too many CG iterations: count = %d", res.n_count);
816  return res;
817  }
818 #if 1
819  QDPIO::cout << "vecPrecondCG: k = " << k << " res^2 = " << r_norm2;
820  QDPIO::cout << " r_dot_z = " << r_dot_z << std::endl;
821 #endif
822  }
823 
824  res.n_count = k;
825  res.resid = sqrt(r_norm2);
826  swatch.stop();
827  QDPIO::cout << "vPreconfCG: k = " << k << std::endl;
828  flopcount.report("vPrecondCG", swatch.getTimeInSeconds());
829  END_CODE();
830  return res;
831  }
832 
833  template<typename T>
835  multi1d<T>& x,
836  const multi1d<T>& b,
837  const multi1d<Double>& eval,
838  const multi2d<T>& evec,
839  int& n_count)
840  {
841  int N = evec.size2();
842  InitGuess(A,x,b,eval,evec,N,n_count);
843  }
844 
845  template<typename T>
847  multi1d<T>& x,
848  const multi1d<T>& b,
849  const multi1d<Double>& eval,
850  const multi2d<T>& evec,
851  int N, // number of vectors to use
852  int& n_count)
853  {
854  multi1d<T> p;
855  multi1d<T> Ap;
856  multi1d<T> r;
857 
858  const int Ls = A.size();
859 
860  StopWatch snoop;
861  snoop.reset();
862  snoop.start();
863 
864  A(Ap,x,PLUS);
865  for(int l=0; l < Ls; ++l)
866  r[l][A.subset()] = b[l] - Ap[l];
867  // Double r_norm2 = norm2(r,A.subset());
868 
869  for(int i(0);i<N;i++)
870  {
871  DComplex d = innerProduct(evec[i],r,A.subset());
872  for(int l=0; l < Ls; ++l)
873  x[l][A.subset()] += (d/eval[i])*evec[i][l];
874  //QDPIO::cout<<"InitCG: "<<d<<" "<<eval[i]<<std::endl;
875 
876  }
877 
878  snoop.stop();
879  QDPIO::cout << "InitGuess: time = "
880  << snoop.getTimeInSeconds()
881  << " secs" << std::endl;
882 
883  n_count = 1;
884  }
885 
886 
887  //
888  // Wrappers
889  //
890  // LatticeFermionF
893  const multi2d<LatticeFermionF>& evec,
894  int Nvecs)
895  {
896  SubSpaceMatrix_T(H, A, evec, Nvecs);
897  }
898 
900  multi1d<LatticeFermionF>& x,
901  const multi1d<LatticeFermionF>& b,
902  multi1d<Double>& eval,
903  multi2d<LatticeFermionF>& evec,
904  int Neig,
905  int Nmax,
906  const Real& RsdCG, int MaxCG)
907  {
908  return InvEigCG2_T(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG);
909  }
910 
912  multi1d<LatticeFermionF>& x,
913  const multi1d<LatticeFermionF>& b,
914  const multi1d<Double>& eval,
915  const multi2d<LatticeFermionF>& evec,
916  int startV, int endV,
917  const Real& RsdCG, int MaxCG)
918  {
919  return vecPrecondCG_T(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
920  }
921 
923  multi1d<LatticeFermionF>& x,
924  const multi1d<LatticeFermionF>& b,
925  const multi1d<Double>& eval,
926  const multi2d<LatticeFermionF>& evec,
927  int& n_count)
928  {
929  InitGuess_T(A, x, b, eval, evec, n_count);
930  }
931 
933  multi1d<LatticeFermionF>& x,
934  const multi1d<LatticeFermionF>& b,
935  const multi1d<Double>& eval,
936  const multi2d<LatticeFermionF>& evec,
937  int N, // number of vectors to use
938  int& n_count)
939  {
940  InitGuess_T(A, x, b, eval, evec, N, n_count);
941  }
942 
943 
944  // LatticeFermionD
947  const multi2d<LatticeFermionD>& evec,
948  int Nvecs)
949  {
950  SubSpaceMatrix_T(H, A, evec, Nvecs);
951  }
952 
954  multi1d<LatticeFermionD>& x,
955  const multi1d<LatticeFermionD>& b,
956  multi1d<Double>& eval,
957  multi2d<LatticeFermionD>& evec,
958  int Neig,
959  int Nmax,
960  const Real& RsdCG, int MaxCG)
961  {
962  return InvEigCG2_T(A, x, b, eval, evec, Neig, Nmax, RsdCG, MaxCG);
963  }
964 
966  multi1d<LatticeFermionD>& x,
967  const multi1d<LatticeFermionD>& b,
968  const multi1d<Double>& eval,
969  const multi2d<LatticeFermionD>& evec,
970  int startV, int endV,
971  const Real& RsdCG, int MaxCG)
972  {
973  return vecPrecondCG_T(A, x, b, eval, evec, startV, endV, RsdCG, MaxCG);
974  }
975 
977  multi1d<LatticeFermionD>& x,
978  const multi1d<LatticeFermionD>& b,
979  const multi1d<Double>& eval,
980  const multi2d<LatticeFermionD>& evec,
981  int& n_count)
982  {
983  InitGuess_T(A, x, b, eval, evec, n_count);
984  }
985 
987  multi1d<LatticeFermionD>& x,
988  const multi1d<LatticeFermionD>& b,
989  const multi1d<Double>& eval,
990  const multi2d<LatticeFermionD>& evec,
991  int N, // number of vectors to use
992  int& n_count)
993  {
994  InitGuess_T(A, x, b, eval, evec, N, n_count);
995  }
996  } // namespace InvEigCG2ArrayEnv
997 
998 }// End Namespace Chroma
999 
This is a square matrix.
Definition: containers.h:328
void NormalizeAndAddVector(const multi1d< T > &v, const Double &inorm, const Subset &s)
Definition: containers.h:383
void AddOrReplaceVector(const multi1d< T > &v, const Subset &s)
Definition: containers.h:402
void AddVectors(multi2d< T > &v, const Subset &s)
Definition: containers.h:422
Linear Operator to arrays.
Definition: linearop.h:61
void normGramSchmidt(multi1d< LatticeFermionF > &vec, int f, int t, const Subset &sub)
Gram-Schmidt with normalization.
Conjugate-Gradient algorithm with eigenstd::vector acceleration.
unsigned j
Definition: ldumul_w.cc:35
int z
Definition: meslate.cc:36
int x
Definition: meslate.cc:34
SpinMatrix C()
C = Gamma(10)
Definition: barspinmat_w.cc:29
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
void SubSpaceMatrix_T(LinAlg::Matrix< DComplex > &H, const LinearOperatorArray< T > &A, const multi2d< T > &evec, int Nvecs)
SystemSolverResults_t vecPrecondCG(const LinearOperatorArray< LatticeFermionD > &A, multi1d< LatticeFermionD > &x, const multi1d< LatticeFermionD > &b, const multi1d< Double > &eval, const multi2d< LatticeFermionD > &evec, int startV, int endV, const Real &RsdCG, int MaxCG)
SystemSolverResults_t InvEigCG2(const LinearOperatorArray< LatticeFermionD > &A, multi1d< LatticeFermionD > &x, const multi1d< LatticeFermionD > &b, multi1d< Double > &eval, multi2d< LatticeFermionD > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG)
SystemSolverResults_t old_InvEigCG2_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, multi1d< Double > &eval, multi2d< T > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG)
SystemSolverResults_t vecPrecondCG_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, const multi1d< Double > &eval, const multi2d< T > &evec, int startV, int endV, const Real &RsdCG, int MaxCG)
void SubSpaceMatrix(LinAlg::Matrix< DComplex > &H, const LinearOperatorArray< LatticeFermionD > &A, const multi2d< LatticeFermionD > &evec, int Nvecs)
SystemSolverResults_t InvEigCG2_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, multi1d< Double > &eval, multi2d< T > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG)
void InitGuess(const LinearOperatorArray< LatticeFermionD > &A, multi1d< LatticeFermionD > &x, const multi1d< LatticeFermionD > &b, const multi1d< Double > &eval, const multi2d< LatticeFermionD > &evec, int N, int &n_count)
void InitGuess_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, const multi1d< Double > &eval, const multi2d< T > &evec, int N, int &n_count)
static const LatticeInteger & beta(const int dim)
Definition: stag_phases_s.h:47
static const LatticeInteger & alpha(const int dim)
Definition: stag_phases_s.h:43
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
DComplex d
Definition: invbicg.cc:99
START_CODE()
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
std::string tag(const std::string &prefix)
Definition: octave.h:15
FloatingPoint< double > Double
Definition: gtest.h:7351
Gram-Schmidt with normalization.
int l
Definition: pade_trln_w.cc:111
Holds return info from SystemSolver call.
Definition: syssolver.h:17
multi1d< LatticeColorMatrix > U