3 #include "qdp-lapack.h"
11 "<?xml version='1.0'?> \
14 <FermAct>CLOVER</FermAct> \
15 <Kappa>0.115</Kappa> \
16 <clovCoeff>1.17</clovCoeff> \
17 <clovCoeffR>0.91</clovCoeffR> \
18 <clovCoeffT>1.07</clovCoeffT> \
20 <anisoP>true</anisoP> \
26 <Name>STOUT_FERM_STATE</Name> \
28 <n_smear>2</n_smear> \
29 <orthog_dir>3</orthog_dir> \
31 <FermBC>SIMPLE_FERMBC</FermBC> \
32 <boundary>1 1 1 -1</boundary> \
37 <invType>FGMRESDR_INVERTER</invType> \
38 <RsdTarget>1.0e-7</RsdTarget> \
39 <NKrylov>5</NKrylov> \
41 <MaxIter>130</MaxIter> \
43 <invType>MR_INVERTER</invType> \
55 using T = LatticeFermion;
56 using Q = multi1d<LatticeColorMatrix>;
57 using P = multi1d<LatticeColorMatrix>;
69 XMLReader xml_in(input);
90 multi1d<LatticeColorMatrix>
u;
108 ASSERT_EQ(
p.PrecondParams.path,
"/PrecondParams");
115 XMLReader xml_in(input);
120 ASSERT_EQ(
p.PrecondParams.path,
"/PrecondParams");
127 XMLReader xml_in(input);
135 XMLReader xml_in(input);
144 multi2d<DComplex> H(cols,rows);
146 for(
int col=0; col < n_krylov; ++col) {
147 for(
int row=0; row < n_krylov+1; ++row) {
148 H(col,row) = DComplex(0);
153 for(
int row=0; row < dim+1; ++row) {
155 for(
int col=0; col < dim; ++col) {
156 QDPIO::cout << H(col, row) <<
" ";
158 QDPIO::cout <<
" ] " << std::endl;
165 XMLReader xml_in(input);
170 const Subset&
s = sol.
subset();
178 Real rsd_target(1.0e-6);
179 rsd_target*=sqrt(norm2(rhs,
s));
182 multi2d<DComplex> H(n_krylov,n_krylov+1);
183 multi2d<DComplex> R(n_krylov,n_krylov+1);
184 multi1d<T> V(n_krylov+1);
185 multi1d<T> Z(n_krylov+1);
186 multi1d< Handle<Givens> > givens_rots(n_krylov+1);
187 multi1d<DComplex> g(n_krylov+1);
188 multi2d<DComplex> Qk_Hk;
189 multi1d<DComplex> Qk_Hk_taus;
191 for(
int col=0; col < n_krylov; ++col) {
192 for(
int row=0; row < n_krylov+1; ++row) {
193 H(col,row) = DComplex(0);
194 R(col,row) = DComplex(0);
200 for(
int j=0;
j < g.size(); ++
j) {
206 V[0][
s] = beta_inv * rhs;
224 for(
int row=0; row < dim+1; ++row) {
225 for(
int col = 0; col < row-1; ++col) {
229 double re = toDouble( real( H(col,row) ) );
230 double im = toDouble( imag( H(col,row) ) );
240 for(
int j=0;
j < dim; ++
j) {
244 for(
int j=0;
j < dim; ++
j) {
245 for(
int i=
j+1;
i < dim; ++
i) {
253 multi2d<DComplex> H(1,2);
255 for(
int row=0; row < 2; ++row) {
256 H(0,row) = DComplex(0);
275 multi2d<DComplex> H(1,2);
277 for(
int row=0; row < 2; ++row) {
278 H(0,row) = DComplex(0);
302 multi2d<DComplex> H(1,2);
304 for(
int row=0; row < 2; ++row) {
305 H(0,row) = DComplex(0);
323 EXPECT_DOUBLE_EQ( toDouble(real(sqrt(norm2(f)))), toDouble( real(H(0,0)) ) );
324 EXPECT_DOUBLE_EQ( toDouble(imag(sqrt(norm2(f)))), toDouble( imag(H(0,0)) ) );
329 multi2d<DComplex> H(2,2);
331 for(
int row=0; row < 2; ++row) {
332 H(0,row) = DComplex(0);
380 XMLReader xml_in(input);
385 const Subset&
s = sol.
subset();
392 double rsd_target_in=GetParam();
393 Real rsd_target(rsd_target_in);
394 rsd_target *= sqrt(norm2(rhs,
s));
400 multi2d<DComplex> H(n_krylov, n_krylov+1);
401 multi2d<DComplex> R(n_krylov, n_krylov+1);
402 for(
int col = 0; col < n_krylov; ++col) {
403 for(
int row = 0; row < n_krylov+1; ++row) {
405 H(col,row) = DComplex(0);
406 R(col,row) = DComplex(0);
409 multi1d<T> V(n_krylov+1);
410 multi1d<T> Z(n_krylov+1);
411 multi1d< Handle<Givens> > givens_rots(n_krylov+1);
412 multi1d<DComplex> g(n_krylov+1);
413 multi2d<DComplex> Qk;
414 multi1d<DComplex> Qk_tau;
419 for(
int j=0;
j < g.size(); ++
j) {
425 V[0][
s] = beta_inv * rhs;
446 for(
int row=0; row < dim+1; ++row) {
448 for(
int col = 0; col < row-1; ++col) {
449 double re = toDouble( real( H(col,row) ) );
450 double im = toDouble( imag( H(col,row) ) );
460 for(
int j=0;
j < dim+1; ++
j) {
464 for(
int j=0;
j < dim+1; ++
j) {
465 for(
int i=
j+1;
i < dim+1; ++
i) {
470 multi1d< Handle<Givens> > GivensRots(dim+1);
472 multi1d< DComplex > resvec(dim+1);
473 for(
int row=0; row < dim+1; ++row) {
474 resvec[row] = DComplex(0);
479 for(
int j=0;
j < dim; ++
j) {
484 for(
int i=0;
i <
j; ++
i) {
485 (*GivensRots[
i])(
j, H);
489 (*GivensRots[
j])(
j,H);
490 (*GivensRots[
j])(resvec);
494 for(
int row=1; row < dim+1; ++row) {
495 for(
int col=0; col < row; ++col) {
504 for(
int row=0; row < dim+1; ++row) {
505 for(
int col=row; col < dim; ++col) {
513 for(
int col=0; col < dim+1; ++col) {
514 QDPIO::cout <<
"col = " << col <<
" g=" <<g[col] <<
" resvec" << resvec[col] << std::endl;
530 XMLReader xml_in(input);
537 multi2d<DComplex> R(n_cols,n_rows);
538 multi1d<DComplex>
b(n_rows);
539 multi1d<DComplex>
x(n_rows);
540 for(
int row=0; row < n_rows; ++row) {
541 for(
int col=0; col < n_cols; ++col) {
542 R(col,row)=DComplex(0);
549 for(
int row=0; row < n_rows-1; ++row) {
550 for(
int col=row; col < n_cols; ++col) {
558 for(
int row=0; row < n_cols; ++row) {
559 DComplex prod=DComplex(0);
560 for(
int col=row; col < n_cols; ++col) {
561 prod += R(col,row)*
x[col];
572 XMLReader xml_in(input);
576 float rsd_target_in = GetParam();
577 p.RsdTarget = Real(rsd_target_in);
583 const Subset&
s = linop->subset();
590 LatticeFermion
x =
zero;
597 LatticeFermion
r=
zero;
601 Double resid_rel = sqrt( norm2(
r,
s)/norm2(rhs,
s) );
602 ASSERT_LE( toDouble(resid_rel), toDouble(
p.RsdTarget) );
610 XMLReader xml_in(input);
615 float rsd_target_in = GetParam();
616 p.RsdTarget = Real(rsd_target_in);
622 const Subset&
s = linop->subset();
629 LatticeFermion
x =
zero;
636 LatticeFermion
r=
zero;
640 Double resid_rel = sqrt( norm2(
r,
s)/norm2(rhs,
s) );
641 ASSERT_LE( toDouble(resid_rel), toDouble(
p.RsdTarget) );
656 XMLReader xml_in(input);
661 const Subset&
s = sol.
subset();
668 Real rsd_target(1.0e-9);
669 rsd_target *= sqrt(norm2(rhs,
s));
675 multi2d<DComplex> H(n_krylov,n_krylov+1);
676 multi2d<DComplex> R(n_krylov,n_krylov+1);
677 for(
int row = 0; row < n_krylov+1; ++row) {
678 for(
int col = 0; col < n_krylov; ++col) {
679 H(col,row) = DComplex(0);
680 R(col,row) = DComplex(0);
683 multi1d<T> V(n_krylov+1);
684 multi1d<T> Z(n_krylov+1);
685 multi1d< Handle<Givens> > givens_rots(n_krylov+1);
686 multi1d<DComplex> g(n_krylov+1);
687 multi2d<DComplex> Qk;
688 multi1d<DComplex> Qk_tau;
692 for(
int j=0;
j < g.size(); ++
j) {
698 V[0][
s] = beta_inv * rhs;
715 multi1d<DComplex> f_m(n_krylov);
716 multi1d<DComplex> h_m(n_krylov);
717 multi2d<DComplex> h1(n_krylov, n_krylov);
728 for(
int col=0; col < n_krylov; ++col) {
729 for(
int row =0; row < n_krylov; ++row) {
730 h1(col,row) = H(col,row);
734 for(
int col=0; col < n_krylov; ++col) {
735 h_m[col] = conj(H(col, n_krylov));
739 multi1d<int> ipiv(n_krylov);
743 QDPIO::cout <<
"CALLING ZGETRF to factor H" << std::endl;
744 QDPLapack::zgetrf(n_krylov,n_krylov,h1,n_krylov,ipiv,
info);
748 QDPIO::cout <<
"CALLING ZGETRS to solve for f" << std::endl;
751 QDPLapack::zgetrs(trans, n_krylov,1,h1,n_krylov,ipiv,f_m,n_krylov,
info);
755 multi1d<DComplex> diff(n_krylov);
757 for(
int row=0; row < n_krylov; ++row) {
764 QDPLapack::zgemv(trans,n_krylov, n_krylov, H, n_krylov+1, f_m, diff);
765 for(
int row=0; row < n_krylov; ++row) {
766 diff[row] -= h_m[row];
767 EXPECT_LT( toDouble(real(diff[row])), 1.0e-14 );
768 EXPECT_LT( toDouble(imag(diff[row])), 1.0e-14 );
771 for(
int row=0; row < n_krylov;++row) {
772 QDPIO::cout <<
"diff["<<row<<
"] = " << diff[row] << std::endl;
779 multi2d<DComplex> H_tilde(n_krylov,n_krylov);
780 for(
int row=0; row < n_krylov;++row) {
781 for(
int col=0; col< n_krylov; ++col) {
782 H_tilde(col,row) = H(col,row) + f_m[row]*conj(h_m[col]);
787 multi1d<DComplex> evals(n_krylov);
788 multi2d<DComplex> evecs(n_krylov, n_krylov);
790 QDPIO::cout <<
"CALLING ZGEEV" << std::endl;
791 QDPLapack::zgeev(n_krylov, H_tilde, evals, evecs);
792 QDPIO::cout <<
"Checking evecs" << std::endl;
795 for(
int row=0; row < n_krylov;++row) {
796 for(
int col=0; col< n_krylov; ++col) {
797 H_tilde(col,row) = H(col,row) + f_m[row]*conj(h_m[col]);
803 for(
int evec=0; evec < n_krylov; ++evec) {
806 multi1d<DComplex> my_evec(n_krylov);
807 for(
int row=0; row<n_krylov; ++row) {
808 my_evec[row] = evecs(evec,row);
813 QDPLapack::zgemv(trans, n_krylov, n_krylov, H_tilde, n_krylov, my_evec, diff);
816 for(
int row=0; row < n_krylov; ++row) {
817 diff[row] -= evals[evec]*evecs(evec,row);
820 EXPECT_LT( toDouble(real(diff[row])), 1.0e-14 );
821 EXPECT_LT( toDouble(imag(diff[row])), 1.0e-14 );
823 QDPIO::cout <<
"eigenvec: " << evec <<
" diff[" << row <<
"]=" << diff[row] <<
" lambda=" << evals[evec] << std::endl;
831 XMLReader xml_in(input);
836 const Subset&
s = sol.
subset();
843 Real rsd_target(1.0e-9);
844 rsd_target *= sqrt(norm2(rhs,
s));
850 multi2d<DComplex> H(n_krylov,n_krylov+1);
851 multi2d<DComplex> R(n_krylov,n_krylov+1);
852 for(
int row = 0; row < n_krylov+1; ++row) {
853 for(
int col = 0; col < n_krylov; ++col) {
854 H(col,row) = DComplex(0);
855 R(col,row) = DComplex(0);
858 multi1d<T> V(n_krylov+1);
859 multi1d<T> Z(n_krylov+1);
860 multi1d< Handle<Givens> > givens_rots(n_krylov+1);
861 multi1d<DComplex> g(n_krylov+1);
862 multi2d<DComplex> Qk;
863 multi1d<DComplex> Qk_tau;
867 for(
int j=0;
j < g.size(); ++
j) {
873 V[0][
s] = beta_inv * rhs;
890 multi1d<DComplex> f_m(n_krylov);
891 multi2d<DComplex> evecs(n_krylov,n_krylov);
892 multi1d<DComplex> evals(n_krylov);
893 multi1d<int> order_array(n_krylov);
903 DComplex prev_eval=evals[order_array[0]];
904 QDPIO::cout <<
"evals[0] = " << prev_eval <<
" || evals[0] || = " << norm2(prev_eval) << std::endl;
905 for(
int i=1;
i < n_krylov;
i++) {
906 DComplex curr_eval=evals[order_array[
i]];
907 QDPIO::cout <<
"evals["<<
i<<
"] = " << curr_eval <<
" ||evals["<<
i<<
"] || = " << norm2(curr_eval) <<std::endl;
909 ASSERT_LE( toDouble(norm2(prev_eval)), toDouble(norm2(curr_eval)));
910 prev_eval = curr_eval;
914 multi1d<DComplex> h_m(n_krylov);
925 for(
int col=0; col < n_krylov; ++col) {
926 h_m[col] = conj(H(col, n_krylov));
932 multi2d<DComplex> H_tilde(n_krylov,n_krylov);
933 for(
int row=0; row < n_krylov;++row) {
934 for(
int col=0; col< n_krylov; ++col) {
935 H_tilde(col,row) = H(col,row) + f_m[row]*conj(h_m[col]);
940 multi1d<DComplex> diff(n_krylov);
941 for(
int evec=0; evec < n_krylov; ++evec) {
944 multi1d<DComplex> my_evec(n_krylov);
945 for(
int row=0; row<n_krylov; ++row) {
946 my_evec[row] = evecs(evec,row);
951 QDPLapack::zgemv(trans, n_krylov, n_krylov, H_tilde, n_krylov, my_evec, diff);
954 for(
int row=0; row < n_krylov; ++row) {
955 diff[row] -= evals[evec]*evecs(evec,row);
958 EXPECT_LT( toDouble(real(diff[row])), 1.0e-14 );
959 EXPECT_LT( toDouble(imag(diff[row])), 1.0e-14 );
961 QDPIO::cout <<
"eigenvec: " << evec <<
" diff[" << row <<
"]=" << diff[row] <<
" lambda=" << evals[evec] << std::endl;
970 XMLReader xml_in(input);
975 const Subset&
s = sol.
subset();
982 Real rsd_target(1.0e-9);
983 rsd_target *= sqrt(norm2(rhs,
s));
985 int n_krylov=
p.NKrylov;
986 int n_deflate=
p.NDefl;
987 int total_dim = n_krylov+n_deflate;
990 multi2d<DComplex> H(total_dim, total_dim + 1);
991 multi2d<DComplex> R(total_dim, total_dim + 1);
993 for(
int col = 0; col < total_dim; ++col) {
994 for(
int row = 0; row < total_dim+1; ++row) {
995 H(col,row) = DComplex(0);
996 R(col,row) = DComplex(0);
1000 multi1d<T> V(total_dim+1);
1001 multi1d<T> Z(total_dim+1);
1002 multi1d< Handle<Givens> > givens_rots(total_dim+1);
1003 multi1d<DComplex> g(total_dim + 1);
1004 multi1d<DComplex>
c(total_dim + 1);
1005 multi2d<DComplex> Qk;
1006 multi1d<DComplex> Qk_tau;
1010 for(
int j=0;
j < g.size(); ++
j) {
1019 V[0][
s] = beta_inv * rhs;
1036 multi1d<DComplex>
eta(n_krylov);
1041 multi1d<DComplex> f_m(n_krylov);
1042 multi2d<DComplex> evecs(n_krylov,n_krylov);
1043 multi1d<DComplex> evals(n_krylov);
1044 multi1d<int> order_array(n_krylov);
1060 QDPIO::cout <<
"Setting up G_k and G_{k+1}" << std::endl << std::flush;
1061 multi2d<DComplex> Qkplus1(n_deflate+1, n_krylov+1);
1064 for(
int col=0; col < n_deflate; ++col) {
1065 for(
int row=0; row < n_krylov; ++row) {
1068 Qkplus1(col,row) = evecs( order_array[col], row);
1070 Qkplus1(col,n_krylov) =
zero;
1073 for(
int row=0; row < n_krylov+1; ++row) {
1074 Qkplus1(n_deflate,row) =
c[row];
1075 for(
int col=0; col < n_krylov; ++col) {
1076 Qkplus1(n_deflate,row) -= H(col,row)*
eta(col);
1083 multi1d<DComplex> tau_kplus1;
1084 QDPIO::cout <<
"QR Decomposing Gk+1" << std::endl;
1085 QDPLapack::zgeqrf(n_krylov+1, n_deflate+1, Qkplus1, tau_kplus1);
1092 multi2d<DComplex> H_copy(n_krylov, n_krylov+1);
1093 multi2d<DComplex> H_copy2(n_krylov, n_krylov+1);
1094 for(
int col=0; col < n_krylov; ++col) {
1095 for(
int row=0; row < n_krylov+1; ++row) {
1096 H_copy(col,row) = H(col,row);
1097 H_copy2(col,row) = H(col,row);
1105 QDPIO::cout <<
"Post multiplying with Q_k using ZUNMQR 1" << std::endl;
1113 QDPLapack::zunmqr2(side,trans, n_krylov+1, n_krylov, n_deflate, Qkplus1, tau_kplus1, H_copy);
1116 QDPIO::cout <<
"Pre multiply with Q^{H}_{k+1} usign ZUNMQR2" << std::endl;
1123 QDPLapack::zunmqr2(side,trans, n_krylov+1, n_krylov, n_deflate+1, Qkplus1, tau_kplus1, H_copy);
1130 QDPLapack::zungqr(n_krylov+1,n_deflate+1,n_deflate+1, Qkplus1, tau_kplus1);
1139 multi2d<DComplex> HKQK(n_deflate, n_krylov+1);
1144 DComplex cbeta =
zero;
1154 QDPLapack::zgemm(transa, transb,
1165 multi2d<DComplex> QK1HKQK(n_deflate, n_deflate+1);
1179 QDPLapack::zgemm(transa, transb,
1190 QDPIO::cout <<
"QK1HKQK - H_copy" << std::endl;
1192 for(
int row = 0; row < n_deflate+1; ++row) {
1193 for(
int col=0; col < n_deflate; ++col) {
1194 QK1HKQK(col,row) -= H_copy(col,row);
1195 QDPIO::cout << QK1HKQK(col,row) <<
"\t" ;
1196 EXPECT_LT( toDouble(sqrt(norm2(QK1HKQK(col,row)))), 1.0e-15 );
1198 QDPIO::cout << std::endl;
1202 multi1d<LatticeFermion> new_V(n_deflate+1);
1203 for(
int i=0;
i < n_deflate+1; ++
i) {
1205 for(
int j=0;
j < n_krylov+1; ++
j) {
1206 new_V[
i] += V[
j]*Qkplus1(
i,
j);
1211 for(
int i=0;
i < n_deflate+1; ++
i) {
1212 for(
int j=0;
j < n_deflate+1; ++
j ) {
1214 QDPIO::cout <<
"< V[" <<
i <<
"],V["<<
j<<
"] > = " << iprod ;
1219 Double diff_i = fabs(imag(iprod));
1220 QDPIO::cout <<
" ABS DIFF=" << fabs(diff_r) <<
" ABS DIFF IM=" << diff_i << std::endl;
1226 Double diff_r = fabs(real(iprod));
1227 Double diff_i = fabs(imag(iprod));
1228 QDPIO::cout <<
" ABS DIFF RE=" << diff_r <<
" ABS DIFF IM=" << diff_i << std::endl;
1236 multi1d<LatticeFermion> new_Z(n_deflate);
1237 for(
int i=0;
i < n_deflate; ++
i) {
1239 for(
int j=0;
j < n_krylov; ++
j) {
1240 new_Z[
i] += Z[
j]*Qkplus1(
i,
j);
Primary include file for CHROMA library code.
Base class for quadratic matter actions (e.g., fermions)
Class for counted reference semantics.
Solve a M*psi=chi linear system by FGMRESDR.
const Subset & subset() const
Return the subset on which the operator acts.
void LeastSquaresSolve(const multi2d< DComplex > &R, const multi1d< DComplex > &rhs, multi1d< DComplex > &eta, int dim) const
void FlexibleArnoldi(int n_krylov, int n_deflate, const Real &rsd_target, multi1d< T > &V, multi1d< T > &Z, multi2d< DComplex > &H, multi2d< DComplex > &R, multi1d< Handle< Givens > > &givens_rots, multi1d< DComplex > &g, multi2d< DComplex > &Qk, multi1d< DComplex > &Qk_tau, int &ndim_cycle) const
void GetEigenvectors(int total_dim, const multi2d< DComplex > &H, multi1d< DComplex > &f_m, multi2d< DComplex > &evecs, multi1d< DComplex > &evals, multi1d< int > &order_array) const
Handle< FermAct4D< T, P, Q > > S_f
multi1d< LatticeColorMatrix > P
Handle< FermState< T, P, Q > > state
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrix > u
Handle< LinearOperator< T > > linop
Fermion action factories.
INSTANTIATE_TEST_CASE_P(FGMRESDRTests, FGMRESDRTestsFloatParams, testing::Values(1.0e-3, 1.0e-9))
const std::string xml_for_param
TEST_P(FGMRESDRTestsFloatParams, arnoldi5GivensRot)
TEST_F(FGMRESDRTests, canConstructDefault)
#define ASSERT_EQ(val1, val2)
#define EXPECT_DOUBLE_EQ(expected, actual)
#define ASSERT_NEAR(val1, val2, abs_error)
#define ASSERT_LE(val1, val2)
#define EXPECT_LT(val1, val2)
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
static const LatticeInteger & beta(const int dim)
multi1d< LatticeColorMatrix > G
Asqtad Staggered-Dirac operator.
static multi1d< LatticeColorMatrix > u
void reunit(LatticeColorMatrixF3 &xa)
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
const WilsonTypeFermAct< multi1d< LatticeFermion > > & S_f
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
internal::ValueArray1< T1 > Values(T1 v1)
Params for FGMRESDR inverter.
Solve an FGMRESR-DR system.
Solve a M*psi=chi linear system by FGMRES_DR.