6 #include <qdp-lapack.h>
22 using namespace LinAlg;
25 namespace InvEigCG2ArrayEnv
33 const multi2d<T>& evec,
37 if(Nvecs>evec.size2()){
41 QDPIO::cerr<<
"OOPS! your matrix can't take this many matrix elements\n";
48 for(
int i(0);
i<H.
N;
i++)
51 for(
int j(
i);
j<H.
N;
j++)
55 H(
i,
j) = conj(H(
j,
i));
56 if(
i==
j) H(
i,
j) = real(H(
i,
j));
66 multi1d<Double>& eval,
74 FlopCounter flopcount;
80 const int Ls =
A.size();
93 Double r_dot_z, r_dot_z_old;
97 for(
int l=0;
l < Ls; ++
l)
98 r[
l][
A.subset()] =
b[
l] - Ap[
l];
102 QDPIO::cout <<
"InvEigCG2: Nevecs(input) = " << evec.size2() << std::endl;
103 QDPIO::cout <<
"InvEigCG2: k = " <<
k <<
" res^2 = " << r_dot_z << std::endl;
116 while(toBool(r_dot_z>
rsd_sq))
122 r_dot_z_old = r_dot_z;
123 r_dot_z = norm2(
r,
A.subset());
124 Double inv_sqrt_r_dot_z = 1.0/sqrt(r_dot_z);
128 for(
int l=0;
l < Ls; ++
l)
129 p[
l][
A.subset()] =
r[
l];
133 beta = r_dot_z/r_dot_z_old;
135 for(
int l=0;
l < Ls; ++
l)
139 if((Neig>0)&& (H.
N == Nmax))
140 for(
int l=0;
l < Ls; ++
l)
141 Ap_prev[
l][
A.subset()] = Ap[
l];
149 H(H.
N-1,H.
N-1) = 1/
alpha + betaprev/alphaprev;
152 QDPIO::cout<<
"MAGIC BEGINS: H.N ="<<H.
N<<std::endl;
154 #ifndef QDP_IS_QDPJIT
156 std::stringstream
tag;
158 OctavePrintOut(H.
mat,Nmax,
tag.str(),
"Hmatrix.m");
163 std::stringstream
tag;
165 OctavePrintOut(
tmp.mat,Nmax,
tag.str(),
"Hmatrix.m");
169 multi2d<DComplex> Hevecs(H.
mat);
170 multi1d<Double> Heval;
171 char V =
'V';
char U =
'U';
172 QDPLapack::zheev(V,
U,Nmax,Hevecs,Heval);
174 #ifndef QDP_IS_QDPJIT
176 std::stringstream
tag;
178 OctavePrintOut(Hevecs,Nmax,
tag.str(),
"Hmatrix.m");
180 for(
int i(0);
i<Nmax;
i++)
181 QDPIO::cout<<
" eignvalue: "<<Heval[
i]<<std::endl;
184 multi2d<DComplex> Hevecs_old(H.
mat);
185 multi1d<Double> Heval_old;
186 QDPLapack::zheev(V,
U,Nmax-1,Hevecs_old,Heval_old);
187 for(
int i(0);
i<Nmax;
i++)
188 Hevecs_old(
i,Nmax-1) = Hevecs_old(Nmax-1,
i) = 0.0;
193 for(
int i(Neig);
i<2*Neig;
i++)
194 for(
int j(0);
j<Nmax;
j++)
195 Hevecs(
i,
j) = Hevecs_old(
i-Neig,
j);
200 multi1d<DComplex> TAU;
201 QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
202 char R =
'R';
char L =
'L';
char N =
'N';
char C =
'C';
203 multi2d<DComplex> Htmp = H.
mat;
204 QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
205 QDPLapack::zunmqr(L,
C,Nmax,2*Neig,Hevecs,TAU,Htmp);
207 QDPLapack::zheev(V,
U,2*Neig,Htmp,Heval);
209 #ifndef QDP_IS_QDPJIT
211 std::stringstream
tag;
213 OctavePrintOut(Htmp,Nmax,
tag.str(),
"Hmatrix.m");
217 for(
int i(Neig);
i< 2*Neig;
i++ )
218 for(
int j(2*Neig);
j<Nmax;
j++)
222 #ifndef QDP_IS_QDPJIT
224 std::stringstream
tag;
225 tag<<
"HtmpBeforeZUM"<<
k;
226 OctavePrintOut(Htmp,Nmax,
tag.str(),
"Hmatrix.m");
230 QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
232 #ifndef QDP_IS_QDPJIT
234 std::stringstream
tag;
235 tag<<
"HtmpAfeterZUM"<<
k;
236 OctavePrintOut(Htmp,Nmax,
tag.str(),
"Hmatrix.m");
241 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
242 multi2d<T> tt_vec(2*Neig,Ls);
243 for(
int i(0);
i<2*Neig;
i++){
244 for(
int l=0;
l < Ls; ++
l)
245 tt_vec[
i][
l][
A.subset()] =
zero;
246 for(
int j(0);
j<Nmax;
j++)
247 for(
int l=0;
l < Ls; ++
l)
248 tt_vec[
i][
l][
A.subset()] +=Htmp(
i,
j)*vec[
j][
l];
250 for(
int i(0);
i<2*Neig;
i++)
251 for(
int l=0;
l < Ls; ++
l)
252 vec[
i][
l][
A.subset()] = tt_vec[
i][
l];
263 for (
int i=0;
i<2*Neig;
i++) H(
i,
i) = Heval[
i];
266 for(
int l=0;
l < Ls; ++
l)
267 tt[
l] = Ap[
l] -
beta*Ap_prev[
l];
269 #ifndef USE_BLAS_FOR_LATTICEFERMIONS
270 for (
int i=0;
i<2*Neig;
i++){
272 H(
i,2*Neig)=conj(H(2*Neig,
i));
294 pAp = innerProductReal(
p,Ap,
A.subset());
297 for(
int l=0;
l < Ls; ++
l)
299 for(
int l=0;
l < Ls; ++
l)
306 res.
resid = sqrt(r_dot_z);
312 QDPIO::cout <<
"InvEigCG2: k = " <<
k ;
313 QDPIO::cout <<
" r_dot_z = " << r_dot_z << std::endl;
319 evec.resize(Neig,Ls);
322 #define USE_LAST_VECTORS
323 #ifdef USE_LAST_VECTORS
325 multi2d<DComplex> Hevecs(H.
mat);
326 multi1d<Double> Heval;
327 char V =
'V';
char U =
'U';
328 QDPLapack::zheev(V,
U,H.
N-1,Hevecs,Heval);
330 for(
int i(0);
i<Neig;
i++){
331 for(
int l=0;
l < Ls; ++
l)
334 for(
int j(0);
j<H.
N-1;
j++)
335 for(
int l=0;
l < Ls; ++
l)
336 evec[
i][
l][
A.subset()] +=Hevecs(
i,
j)*vec[
j][
l];
339 for(
int i(0);
i<Neig;
i++){
340 evec[
i][
A.subset()] = vec[
i];
341 eval[
i] = real(H(
i,
i));
346 res.
resid = sqrt(r_dot_z);
348 QDPIO::cout <<
"InvEigCG2: k = " <<
k << std::endl;
349 flopcount.report(
"InvEigCG2", swatch.getTimeInSeconds());
359 multi1d<Double>& eval,
367 FlopCounter flopcount;
373 const int Ls =
A.size();
384 Double r_dot_z, r_dot_z_old;
390 r[
A.subset()] =
b - Ap;
391 Double r_norm2 = norm2(
r,
A.subset());
394 QDPIO::cout <<
"InvEigCG2: Nevecs(input) = " << evec.size2() << std::endl;
395 QDPIO::cout <<
"InvEigCG2: k = " <<
k <<
" res^2 = " << r_norm2 << std::endl;
397 Real inorm(Real(1.0/sqrt(r_norm2)));
398 bool FindEvals = (Neig>0);
406 p[
A.subset()] = inorm*
r;
421 while(toBool(r_norm2>
rsd_sq))
426 r_dot_z = innerProductReal(
r,
z,
A.subset());
434 beta = r_dot_z/r_dot_z_old;
439 if(!((from_restart)&&(H.
N == tr+1))){
441 H(H.
N-2,H.
N-2) = 1/
alpha + betaprev/alphaprev;
443 from_restart =
false;
452 pAp = innerProductReal(
p,Ap,
A.subset());
458 r_norm2 = norm2(
r,
A.subset());
459 r_dot_z_old = r_dot_z;
467 QDPIO::cout<<
"MAGIC BEGINS: H.N ="<<H.
N<<std::endl;
468 H(Nmax-1,Nmax-1) = 1/
alpha +
beta/alphaprev;
471 #ifndef QDP_IS_QDPJIT
473 std::stringstream
tag;
475 OctavePrintOut(H.
mat,Nmax,
tag.str(),
"Hmatrix.m");
480 std::stringstream
tag;
482 OctavePrintOut(
tmp.mat,Nmax,
tag.str(),
"Hmatrix.m");
487 multi2d<DComplex> Hevecs(H.
mat);
488 multi1d<Double> Heval;
489 char V =
'V';
char U =
'U';
490 QDPLapack::zheev(V,
U,Hevecs,Heval);
491 multi2d<DComplex> Hevecs_old(H.
mat);
494 #ifndef QDP_IS_QDPJIT
496 multi1d<T> tt_vec(vec.
size());
497 for(
int i(0);
i<Nmax;
i++){
498 tt_vec[
i][
A.subset()] =
zero;
499 for(
int j(0);
j<Nmax;
j++)
500 tt_vec[
i][
A.subset()] +=Hevecs(
i,
j)*vec[
j];
505 for(
int i(0);
i<Nmax;
i++){
508 Av[
A.subset()] -= Heval[
i]*tt_vec[
i];
509 Double tt = sqrt(norm2(Av,
A.subset()));
510 QDPIO::cout<<
"1 error eigenstd::vector["<<
i<<
"] = "<<tt<<
" ";
511 tt = sqrt(norm2(tt_vec[
i],
A.subset()));
512 QDPIO::cout<<
" --- eval = "<<Heval[
i]<<
" ";
513 QDPIO::cout<<
" --- rq = "<<real(rq)<<
" ";
514 QDPIO::cout<<
"--- norm = "<<tt<<std::endl ;
519 multi1d<Double> Heval_old;
520 QDPLapack::zheev(V,
U,Nmax-1,Hevecs_old,Heval_old);
521 for(
int i(0);
i<Nmax;
i++)
522 Hevecs_old(
i,Nmax-1) = Hevecs_old(Nmax-1,
i) = 0.0;
524 #ifndef QDP_IS_QDPJIT
526 std::stringstream
tag;
527 tag<<
"Hevecs_old"<<
k;
528 OctavePrintOut(Hevecs_old,Nmax,
tag.str(),
"Hmatrix.m");
535 for(
int i(Neig);
i<tr;
i++)
536 for(
int j(0);
j<Nmax;
j++)
537 Hevecs(
i,
j) = Hevecs_old(
i-Neig,
j);
539 #ifndef QDP_IS_QDPJIT
541 std::stringstream
tag;
543 OctavePrintOut(Hevecs,Nmax,
tag.str(),
"Hmatrix.m");
551 multi1d<DComplex> TAU;
552 QDPLapack::zgeqrf(Nmax,2*Neig,Hevecs,TAU);
553 char R =
'R';
char L =
'L';
char N =
'N';
char C =
'C';
554 multi2d<DComplex> Htmp = H.
mat;
555 QDPLapack::zunmqr(R,N,Nmax,Nmax,Hevecs,TAU,Htmp);
556 QDPLapack::zunmqr(L,
C,Nmax,2*Neig,Hevecs,TAU,Htmp);
560 #ifndef QDP_IS_QDPJIT
562 std::stringstream
tag;
564 OctavePrintOut(Htmp,Nmax,
tag.str(),
"Hmatrix.m");
568 QDPLapack::zheev(V,
U,2*Neig,Htmp,Heval);
569 for(
int i(Neig);
i< 2*Neig;
i++ )
570 for(
int j(2*Neig);
j<Nmax;
j++)
573 #ifndef QDP_IS_QDPJIT
575 std::stringstream
tag;
577 OctavePrintOut(Htmp,Nmax,
tag.str(),
"Hmatrix.m");
581 QDPLapack::zunmqr(L,N,Nmax,2*Neig,Hevecs,TAU,Htmp);
583 #ifndef QDP_IS_QDPJIT
585 std::stringstream
tag;
587 OctavePrintOut(Htmp,Nmax,
tag.str(),
"Hmatrix.m");
592 multi1d<T> tt_vec = vec.
vec;
593 for(
int i(0);
i<2*Neig;
i++){
594 vec[
i][
A.subset()] =
zero;
595 for(
int j(0);
j<Nmax;
j++)
596 vec[
i][
A.subset()] +=Htmp(
i,
j)*tt_vec[
j];
600 #ifndef QDP_IS_QDPJIT
604 for(
int i(0);
i<2*Neig;
i++){
607 Av[
A.subset()] -= Heval[
i]*vec[
i];
608 Double tt = sqrt(norm2(Av,
A.subset()));
609 QDPIO::cout<<
"error eigenstd::vector["<<
i<<
"] = "<<tt<<
" ";
610 tt = sqrt(norm2(vec[
i],
A.subset()));
611 QDPIO::cout<<
"--- rq ="<<real(rq)<<
" ";
612 QDPIO::cout<<
"--- norm = "<<tt<<std::endl ;
619 for (
int i=0;
i<2*Neig;
i++) H(
i,
i) = Heval[
i];
636 for (
int i=0;
i<2*Neig;
i++){
638 H(
i,2*Neig) = conj(H(2*Neig,
i));
646 #ifndef QDP_IS_QDPJIT
648 std::stringstream
tag;
650 OctavePrintOut(H.
mat,Nmax,
tag.str(),
"Hmatrix.m");
656 Double inorm = 1.0/sqrt(r_norm2);
664 res.
resid = sqrt(r_norm2);
670 QDPIO::cout <<
"InvEigCG2: k = " <<
k <<
" res^2 = " << r_norm2;
671 QDPIO::cout <<
" r_dot_z = " << r_dot_z << std::endl;
675 res.
resid = sqrt(r_norm2);
686 char V =
'V';
char U =
'U';
687 multi1d<Double> tt_eval;
688 QDPLapack::zheev(V,
U,Htmp.
mat,tt_eval);
689 evec.resize(Neig,Ls);
691 for(
int i(0);
i<Neig;
i++){
692 evec[
i][
A.subset()] =
zero;
693 eval[
i] = tt_eval[
i];
694 for(
int j(0);
j<2*Neig;
j++)
695 evec[
i][
A.subset()] += Htmp(
i,
j)*vec[
j];
704 for(
int i(0);
i<Neig;
i++)
708 Av[
A.subset()] -= eval[
i]*evec[
i];
709 Double tt = sqrt(norm2(Av,
A.subset()));
710 QDPIO::cout<<
"FINAL: error eigenstd::vector["<<
i<<
"] = "<<tt<<
" ";
711 tt = sqrt(norm2(evec[
i],
A.subset()));
712 QDPIO::cout<<
"--- rq ="<<real(rq)<<
" ";
713 QDPIO::cout<<
"--- norm = "<<tt<<std::endl ;
721 res.
resid = sqrt(r_norm2);
723 QDPIO::cout <<
"InvEigCG2: k = " <<
k << std::endl;
724 flopcount.report(
"InvEigCG2", swatch.getTimeInSeconds());
734 const multi1d<Double>& eval,
735 const multi2d<T>& evec,
736 int startV,
int endV,
741 FlopCounter flopcount;
747 const int Ls =
A.size();
749 if(endV>eval.size()){
750 QDP_error_exit(
"vPrecondCG: not enought evecs eval.size()=%d",eval.size());
762 Double r_dot_z, r_dot_z_old;
768 for(
int l=0;
l < Ls; ++
l)
769 r[
l][
A.subset()] =
b[
l] - Ap[
l];
770 Double r_norm2 = norm2(
r,
A.subset());
773 QDPIO::cout <<
"vecPrecondCG: k = " <<
k <<
" res^2 = " << r_norm2 << std::endl;
777 while(toBool(r_norm2>
rsd_sq)){
779 for(
int l=0;
l < Ls; ++
l)
780 z[
l][
A.subset()] =
r[
l];
782 for(
int i(startV);
i<endV;
i++){
785 for(
int l=0;
l < Ls; ++
l)
786 z[
l][
A.subset()] += (1.0/eval[
i]-1.0)*
d*evec[
i][
l];
790 r_dot_z = innerProductReal(
r,
z,
A.subset());
793 for(
int l=0;
l < Ls; ++
l)
794 p[
l][
A.subset()] =
z[
l];
797 beta = r_dot_z/r_dot_z_old;
798 for(
int l=0;
l < Ls; ++
l)
803 pAp = innerProductReal(
p,Ap,
A.subset());
806 for(
int l=0;
l < Ls; ++
l)
808 for(
int l=0;
l < Ls; ++
l)
810 r_norm2 = norm2(
r,
A.subset());
811 r_dot_z_old = r_dot_z;
819 QDPIO::cout <<
"vecPrecondCG: k = " <<
k <<
" res^2 = " << r_norm2;
820 QDPIO::cout <<
" r_dot_z = " << r_dot_z << std::endl;
825 res.
resid = sqrt(r_norm2);
827 QDPIO::cout <<
"vPreconfCG: k = " <<
k << std::endl;
828 flopcount.report(
"vPrecondCG", swatch.getTimeInSeconds());
837 const multi1d<Double>& eval,
838 const multi2d<T>& evec,
841 int N = evec.size2();
849 const multi1d<Double>& eval,
850 const multi2d<T>& evec,
858 const int Ls =
A.size();
865 for(
int l=0;
l < Ls; ++
l)
866 r[
l][
A.subset()] =
b[
l] - Ap[
l];
869 for(
int i(0);
i<N;
i++)
872 for(
int l=0;
l < Ls; ++
l)
873 x[
l][
A.subset()] += (
d/eval[
i])*evec[
i][
l];
879 QDPIO::cout <<
"InitGuess: time = "
880 << snoop.getTimeInSeconds()
881 <<
" secs" << std::endl;
893 const multi2d<LatticeFermionF>& evec,
900 multi1d<LatticeFermionF>&
x,
901 const multi1d<LatticeFermionF>&
b,
902 multi1d<Double>& eval,
903 multi2d<LatticeFermionF>& evec,
912 multi1d<LatticeFermionF>&
x,
913 const multi1d<LatticeFermionF>&
b,
914 const multi1d<Double>& eval,
915 const multi2d<LatticeFermionF>& evec,
916 int startV,
int endV,
923 multi1d<LatticeFermionF>&
x,
924 const multi1d<LatticeFermionF>&
b,
925 const multi1d<Double>& eval,
926 const multi2d<LatticeFermionF>& evec,
933 multi1d<LatticeFermionF>&
x,
934 const multi1d<LatticeFermionF>&
b,
935 const multi1d<Double>& eval,
936 const multi2d<LatticeFermionF>& evec,
947 const multi2d<LatticeFermionD>& evec,
954 multi1d<LatticeFermionD>&
x,
955 const multi1d<LatticeFermionD>&
b,
956 multi1d<Double>& eval,
957 multi2d<LatticeFermionD>& evec,
966 multi1d<LatticeFermionD>&
x,
967 const multi1d<LatticeFermionD>&
b,
968 const multi1d<Double>& eval,
969 const multi2d<LatticeFermionD>& evec,
970 int startV,
int endV,
977 multi1d<LatticeFermionD>&
x,
978 const multi1d<LatticeFermionD>&
b,
979 const multi1d<Double>& eval,
980 const multi2d<LatticeFermionD>& evec,
987 multi1d<LatticeFermionD>&
x,
988 const multi1d<LatticeFermionD>&
b,
989 const multi1d<Double>& eval,
990 const multi2d<LatticeFermionD>& evec,
void NormalizeAndAddVector(const multi1d< T > &v, const Double &inorm, const Subset &s)
void AddOrReplaceVector(const multi1d< T > &v, const Subset &s)
void AddVectors(multi2d< T > &v, const Subset &s)
Linear Operator to arrays.
void normGramSchmidt(multi1d< LatticeFermionF > &vec, int f, int t, const Subset &sub)
Gram-Schmidt with normalization.
Conjugate-Gradient algorithm with eigenstd::vector acceleration.
SpinMatrix C()
C = Gamma(10)
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
void SubSpaceMatrix_T(LinAlg::Matrix< DComplex > &H, const LinearOperatorArray< T > &A, const multi2d< T > &evec, int Nvecs)
SystemSolverResults_t vecPrecondCG(const LinearOperatorArray< LatticeFermionD > &A, multi1d< LatticeFermionD > &x, const multi1d< LatticeFermionD > &b, const multi1d< Double > &eval, const multi2d< LatticeFermionD > &evec, int startV, int endV, const Real &RsdCG, int MaxCG)
SystemSolverResults_t InvEigCG2(const LinearOperatorArray< LatticeFermionD > &A, multi1d< LatticeFermionD > &x, const multi1d< LatticeFermionD > &b, multi1d< Double > &eval, multi2d< LatticeFermionD > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG)
SystemSolverResults_t old_InvEigCG2_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, multi1d< Double > &eval, multi2d< T > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG)
SystemSolverResults_t vecPrecondCG_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, const multi1d< Double > &eval, const multi2d< T > &evec, int startV, int endV, const Real &RsdCG, int MaxCG)
void SubSpaceMatrix(LinAlg::Matrix< DComplex > &H, const LinearOperatorArray< LatticeFermionD > &A, const multi2d< LatticeFermionD > &evec, int Nvecs)
SystemSolverResults_t InvEigCG2_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, multi1d< Double > &eval, multi2d< T > &evec, int Neig, int Nmax, const Real &RsdCG, int MaxCG)
void InitGuess(const LinearOperatorArray< LatticeFermionD > &A, multi1d< LatticeFermionD > &x, const multi1d< LatticeFermionD > &b, const multi1d< Double > &eval, const multi2d< LatticeFermionD > &evec, int N, int &n_count)
void InitGuess_T(const LinearOperatorArray< T > &A, multi1d< T > &x, const multi1d< T > &b, const multi1d< Double > &eval, const multi2d< T > &evec, int N, int &n_count)
static const LatticeInteger & beta(const int dim)
static const LatticeInteger & alpha(const int dim)
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
std::string tag(const std::string &prefix)
FloatingPoint< double > Double
Gram-Schmidt with normalization.
Holds return info from SystemSolver call.
multi1d< LatticeColorMatrix > U