8 #include "qdp-lapack.h"
18 namespace LinOpSysSolverFGMRESDREnv
23 Handle<
FermState< LatticeFermion, multi1d<LatticeColorMatrix>, multi1d<LatticeColorMatrix> > >
state,
77 template<
typename T >
80 const Real& rsd_target,
89 multi2d<DComplex>& Qk,
90 multi1d<DComplex>& Qk_tau,
97 const int total_dim=n_krylov+n_deflate;
101 for(
int j=n_deflate;
j < total_dim; ++
j) {
108 for(
int i=0;
i <=
j ; ++
i ) {
110 w[
s] -= H(
j,
i)* V[
i];
114 H(
j,
j+1) = DComplex(wnorm);
118 if ( toBool( fabs( wnorm ) <
Double(1.0e-14) ) ) {
123 QDPIO::cout <<
"Converged at iter = " <<
j-n_deflate+1 << std::endl;
134 if( n_deflate > 0 ) {
145 multi1d<DComplex> R_col(
j+2);
146 for(
int i=0;
i <=
j+1;
i++) {
153 QDPLapack::zunmqrv(side,trans, n_deflate+1, n_deflate+1, Qk, Qk_tau, R_col );
155 for(
int i=0;
i <=
j+1;
i++) {
161 for(
int i=0;
i <=
j+1; ++
i) {
167 for(
int i=0;
i <
j; ++
i) {
168 (*givens_rots[
i])(
j,R);
174 (*givens_rots[
j])(
j,R);
175 (*givens_rots[
j])(g);
177 Double accum_resid = fabs(real(g[
j+1]));
181 for(
int rows=0; rows <=
j+1; ++rows) {
182 QDPIO::cout <<
" g["<<rows<<
"]="<<g[rows]<< std::endl;
187 QDPIO::cout <<
"Iter " <<
j-n_deflate+1 <<
" || r || = " << accum_resid <<
" Target=" <<rsd_target << std::endl;
190 if ( toBool( accum_resid <= rsd_target ) ) {
191 QDPIO::cout <<
"Flexible Arnoldi Cycle Converged at iter = " <<
j-n_deflate << std::endl;
202 H_.resize(total_dim, total_dim+1);
203 R_.resize(total_dim, total_dim+1);
207 V_.resize(total_dim+1);
208 Z_.resize(total_dim+1);
210 g_.resize(total_dim+1);
211 c_.resize(total_dim+1);
212 eta_.resize(total_dim);
215 for(
int col =0; col < total_dim; col++) {
216 for(
int row = 0; row < total_dim+1; row++) {
222 for(
int row = 0; row < total_dim+1; row++) {
229 for(
int row = 0; row < total_dim; row++) {
258 const multi1d<DComplex>& Q_rhs,
259 multi1d<DComplex>&
eta,
265 eta[n_cols-1] = Q_rhs[n_cols-1]/R(n_cols-1,n_cols-1);
266 for(
int row = n_cols-2; row >= 0; --row) {
267 eta[row] = Q_rhs[row];
268 for(
int col=row+1; col < n_cols; ++col) {
269 eta[row] -= R(col,row)*
eta[col];
271 eta[row] /= R(row,row);
280 const multi1d<DComplex>& rhs,
281 multi1d<DComplex>&
eta,
285 multi2d<DComplex> R(n_cols,n_cols+1);
286 for(
int col=0; col < n_cols; ++col) {
287 for(
int row=0; row < n_cols+1; ++row) {
288 R(col,row) = H(col,row);
292 multi1d<DComplex> Q_rhs(n_cols+1);
293 for(
int row=0; row < n_cols+1; ++row) {
294 Q_rhs[row] = rhs[row];
297 multi1d<DComplex> R_taus;
298 QDPLapack::zgeqrf(n_cols+1, n_cols, R, R_taus);
301 QDPLapack::zunmqrv(side,
310 eta[n_cols-1] = Q_rhs[n_cols-1]/R(n_cols-1,n_cols-1);
311 for(
int row = n_cols-2; row >= 0; --row) {
312 eta[row] = Q_rhs[row];
313 for(
int col=row+1; col < n_cols; ++col) {
314 eta[row] -= R(col,row)*
eta[col];
316 eta[row] /= R(row,row);
344 const multi2d<DComplex>& H,
345 multi1d<DComplex>& f_m,
346 multi2d<DComplex>& evecs,
347 multi1d<DComplex>& evals,
348 multi1d<int>& order_array)
const
352 evals.resize(total_dim);
353 evecs.resize(total_dim,total_dim);
354 order_array.resize(total_dim);
355 f_m.resize(total_dim);
357 multi2d<DComplex> H_tilde(total_dim,total_dim);
358 multi1d<DComplex> h_m(total_dim);
359 for(
int col=0; col < total_dim; ++col) {
360 for(
int row=0; row < total_dim; ++row) {
361 H_tilde(col,row)=H(col,row);
364 for(
int col=0; col < total_dim; ++col) {
368 h_m[col] = conj(H(col,total_dim));
372 multi1d<int> ipiv(total_dim);
374 QDPLapack::zgetrf(total_dim,total_dim,H_tilde,total_dim,ipiv,
info);
376 QDPIO::cout <<
"ZGETRF reported failure: info=" <<
info << std::endl;
380 QDPLapack::zgetrs(trans,total_dim,1,H_tilde,total_dim,ipiv,f_m,total_dim,
info);
382 QDPIO::cout <<
"ZGETRS reported failure: info=" <<
info << std::endl;
388 for(
int col=0; col < total_dim; ++col) {
389 for(
int row=0; row < total_dim; ++row) {
390 H_tilde(col,row) = H(col,row) + f_m[row]*conj(h_m[col]);
394 QDPLapack::zgeev(total_dim, H_tilde, evals, evecs);
400 std::vector<int> std_order_array(total_dim);
401 for(
int i=0;
i < total_dim; ++
i) {
402 std_order_array[
i]=
i;
408 sort(std_order_array.begin(), std_order_array.end(),
411 [&evals](
int i,
int j)->bool {
412 return toBool( norm2(evals[i]) < norm2(evals[j]));
415 for(
int i=0;
i < total_dim; ++
i) {
416 order_array[
i]=std_order_array[
i];
431 const Real& rsd_target,
437 multi1d<DComplex> &g,
438 multi2d<DComplex> &Qk,
439 multi1d<DComplex> &Qk_tau,
440 int& ndim_cycle)
const
442 FlexibleArnoldiT<>(n_krylov,
447 V,Z,H,R,givens_rots,g, Qk, Qk_tau, ndim_cycle);
464 const Subset&
s =
A_->subset();
484 bool finished = toBool(
r_norm <= target ) ;
499 if (n_cycles == 1 || n_deflate == 0 ) {
509 for(
int j=0;
j <
g_.size(); ++
j) {
521 V_[0][
s] = beta_inv *
r;
527 QDPIO::cout <<
"AUGMENTING SUBSPACE: n_deflate=" << n_deflate <<
" prev_dim=" << prev_dim << std::endl;
530 multi1d<DComplex> f_m(prev_dim);
531 multi2d<DComplex> evecs(prev_dim,prev_dim);
532 multi1d<DComplex> evals(prev_dim);
533 multi1d<int> order_array(prev_dim);
535 (*this).GetEigenvectors(prev_dim,
554 multi2d<DComplex> Qkplus1(n_deflate+1,prev_dim+1);
557 for(
int col=0; col < n_deflate; ++col) {
558 for(
int row=0; row < prev_dim; ++row) {
559 Qkplus1(col,row) = evecs( order_array[col], row);
561 Qkplus1(col,prev_dim) =
zero;
567 for(
int row = 0; row < prev_dim+1; ++row) {
568 Qkplus1(n_deflate,row) =
c_[row];
569 for(
int col=0; col < prev_dim; ++col) {
570 Qkplus1(n_deflate,row) -=
H_(col,row)*
eta_(col);
575 multi1d<DComplex> tau_kplus1;
576 QDPIO::cout <<
"QR Decomposing Gk+1" << std::endl;
577 QDPLapack::zgeqrf(prev_dim+1, n_deflate+1, Qkplus1, tau_kplus1);
580 multi2d<DComplex> H_copy(prev_dim, prev_dim+1);
581 for(
int col=0; col < prev_dim; ++col) {
582 for(
int row=0; row < prev_dim + 1; ++row) {
583 H_copy(col,row) =
H_(col,row);
589 QDPIO::cout <<
"Post multiplying with Q_k using ZUNMQR 1" << std::endl;
594 QDPLapack::zunmqr2(side,trans, prev_dim+1, prev_dim, n_deflate, Qkplus1, tau_kplus1, H_copy);
596 QDPIO::cout <<
"Pre multiply with Q^{H}_{k+1} usign ZUNMQR2" << std::endl;
602 QDPLapack::zunmqr2(side,trans, prev_dim+1, prev_dim, n_deflate+1, Qkplus1, tau_kplus1, H_copy);
607 QDPLapack::zungqr(prev_dim+1,n_deflate+1,n_deflate+1, Qkplus1, tau_kplus1);
610 multi1d<LatticeFermion> new_V(n_deflate+1);
611 for(
int i=0;
i < n_deflate+1; ++
i) {
613 for(
int j=0;
j < prev_dim+1; ++
j) {
614 new_V[
i][
s] +=
V_[
j]*Qkplus1(
i,
j);
618 multi1d<LatticeFermion> new_Z(n_deflate);
619 for(
int i=0;
i < n_deflate; ++
i) {
621 for(
int j=0;
j < prev_dim; ++
j) {
622 new_Z[
i][
s] +=
Z_[
j]*Qkplus1(
i,
j);
627 int total_dim = n_krylov+n_deflate;
629 for(
int col=0; col < total_dim; ++col) {
630 for(
int row=0; row < total_dim; ++row) {
637 for(
int row=0; row < total_dim; ++row) {
643 for(
int cols=0; cols < n_deflate; ++cols) {
644 Z_[cols][
s] = new_Z[cols];
646 for(
int cols=n_deflate; cols < total_dim; ++cols) {
651 for(
int cols=0; cols < n_deflate+1; ++cols) {
652 V_[cols][
s] = new_V[cols];
655 for(
int cols=n_deflate+1; cols < total_dim+1; ++cols) {
661 for(
int row=0; row < n_deflate+1; ++row) {
665 for(
int row=n_deflate+1; row < total_dim+1; ++row) {
670 Hk_QR_.resize(n_deflate,n_deflate+1);
673 for(
int col=0; col < n_deflate; ++col) {
674 for(
int row=0; row < n_deflate+1; ++row) {
675 H_(col,row) = H_copy(col,row);
682 for(
int col=0; col < n_deflate; col++) {
683 for(
int row=col; row < n_deflate; row++) {
694 for(
int row=0; row < total_dim+1; ++row) {
695 QDPIO::cout <<
" g[" << row <<
"]=" <<
g_[row] << std::endl;
720 int iters_this_cycle = dim - n_deflate;
722 QDPIO::cout <<
"Arnoldi cycle finished: dim=" << dim << std::endl;
729 LatticeFermion dx =
zero;
730 for(
int j=0;
j < dim; ++
j) {
738 QDPIO::cout <<
"FGMRESDR: Cycle finished with " << iters_this_cycle <<
" iterations" << std::endl;
747 QDPIO::cout <<
"FGMRESDR: || r || = " <<
r_norm <<
" target = " << target << std::endl;
750 iters_total += iters_this_cycle;
761 QDPIO::cout <<
"FGMRESDR: Done. Cycles=" << n_cycles <<
", Iters=" << iters_total <<
" || r ||/|| b ||=" <<
r_norm / norm_rhs <<
" Target=" <<
invParam_.
RsdTarget << std::endl;
Primary include file for CHROMA library code.
Support class for fermion actions and linear operators.
Class for counted reference semantics.
Solve a M*psi=chi linear system by FGMRESDR.
multi2d< DComplex > Hk_QR_
void LeastSquaresSolve(const multi2d< DComplex > &R, const multi1d< DComplex > &rhs, multi1d< DComplex > &eta, int dim) const
multi1d< Handle< Givens > > givens_rots_
void FlexibleArnoldi(int n_krylov, int n_deflate, const Real &rsd_target, multi1d< T > &V, multi1d< T > &Z, multi2d< DComplex > &H, multi2d< DComplex > &R, multi1d< Handle< Givens > > &givens_rots, multi1d< DComplex > &g, multi2d< DComplex > &Qk, multi1d< DComplex > &Qk_tau, int &ndim_cycle) const
Handle< LinOpSystemSolver< T > > preconditioner_
void GetEigenvectors(int total_dim, const multi2d< DComplex > &H, multi1d< DComplex > &f_m, multi2d< DComplex > &evecs, multi1d< DComplex > &evals, multi1d< int > &order_array) const
multi1d< DComplex > Hk_QR_taus_
SystemSolverResults_t operator()(T &psi, const T &chi) const
Solver the linear system.
Handle< LinearOperator< T > > A_
void InitMatrices()
Initialize the internal matrices.
SysSolverFGMRESDRParams invParam_
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
LinOpSystemSolver< LatticeFermion > * createFerm(XMLReader &xml_in, const std::string &path, Handle< FermState< LatticeFermion, multi1d< LatticeColorMatrix >, multi1d< LatticeColorMatrix > > > state, Handle< LinearOperator< LatticeFermion > > A)
Callback function.
static bool registered
Local registration flag.
bool registerAll()
Register all the factories.
const std::string name("FGMRESDR_INVERTER")
Name to be used.
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
void FlexibleArnoldiT(int n_krylov, int n_deflate, const Real &rsd_target, const LinearOperator< T > &A, const LinOpSystemSolver< T > &M, multi1d< T > &V, multi1d< T > &Z, multi2d< DComplex > &H, multi2d< DComplex > &R, multi1d< Handle< Givens > > &givens_rots, multi1d< DComplex > &g, multi2d< DComplex > &Qk, multi1d< DComplex > &Qk_tau, int &ndim_cycle)
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Params for FGMRESDR inverter.
Holds return info from SystemSolver call.
Register linop system solvers that solve M*psi=chi.
Factory for solving M*psi=chi where M is not hermitian or pos. def.
Solve a M*psi=chi linear system by FGMRES_DR.