6 #ifndef __syssolver_linop_quda_nef_h__
7 #define __syssolver_linop_quda_nef_h__
36 namespace LinOpSysSolverQUDANEFEnv
49 class LinOpSysSolverQUDANEF :
public LinOpSystemSolverArray<LatticeFermion>
52 typedef LatticeFermion
T;
53 typedef LatticeColorMatrix
U;
54 typedef multi1d<LatticeColorMatrix>
Q;
56 typedef LatticeFermionF
TF;
57 typedef LatticeColorMatrixF
UF;
58 typedef multi1d<LatticeColorMatrixF>
QF;
60 typedef LatticeFermionF
TD;
61 typedef LatticeColorMatrixF
UD;
62 typedef multi1d<LatticeColorMatrixF>
QD;
64 typedef WordType<T>::Type_t REALT;
70 LinOpSysSolverQUDANEF(Handle< LinearOperatorArray<T> > A_,
71 Handle< FermState<T,Q,Q> > state_,
72 const SysSolverQUDANEFParams& invParam_) :
73 A(A_), invParam(invParam_)
77 QDPIO::cout <<
"LinOpSysSolverQUDANEF:" << std::endl;
82 int s =
sizeof( WordType<T>::Type_t );
85 cpu_prec = QUDA_SINGLE_PRECISION;
88 cpu_prec = QUDA_DOUBLE_PRECISION;
93 switch( invParam.cudaPrecision ) {
95 gpu_prec = QUDA_HALF_PRECISION;
98 gpu_prec = QUDA_SINGLE_PRECISION;
101 gpu_prec = QUDA_DOUBLE_PRECISION;
110 switch( invParam.cudaSloppyPrecision ) {
112 gpu_half_prec = QUDA_HALF_PRECISION;
115 gpu_half_prec = QUDA_SINGLE_PRECISION;
118 gpu_half_prec = QUDA_DOUBLE_PRECISION;
121 gpu_half_prec = gpu_prec;
126 q_gauge_param = newQudaGaugeParam();
127 quda_inv_param = newQudaInvertParam();
130 const multi1d<int>& latdims = Layout::subgridLattSize();
132 q_gauge_param.X[0] = latdims[0];
133 q_gauge_param.X[1] = latdims[1];
134 q_gauge_param.X[2] = latdims[2];
135 q_gauge_param.X[3] = latdims[3];
138 q_gauge_param.type = QUDA_WILSON_LINKS;
139 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
140 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
142 QDPIO::cout <<
"MDAGM Using QDP-JIT gauge order" << std::endl;
143 q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
144 q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
151 if( invParam.AntiPeriodicT ) {
152 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
155 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
159 q_gauge_param.cpu_prec = cpu_prec;
160 q_gauge_param.cuda_prec = gpu_prec;
163 switch( invParam.cudaReconstruct ) {
165 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
168 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
171 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
174 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
178 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
180 switch( invParam.cudaSloppyReconstruct ) {
182 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
185 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
188 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
191 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
203 links_single[
mu] = (state_->getLinks())[
mu];
207 if( invParam.axialGaugeP ) {
208 QDPIO::cout <<
"Fixing Temporal Gauge" << std::endl;
211 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
213 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
217 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
221 q_gauge_param.anisotropy = 1.0;
225 quda_inv_param.dslash_type = QUDA_MOBIUS_DWF_DSLASH;
227 switch( invParam.solverType ) {
229 quda_inv_param.inv_type = QUDA_CG_INVERTER;
230 solver_string =
"CG";
233 QDPIO::cerr <<
"Solver BICGSTAB not supported for MDWF" << std::endl;
237 QDPIO::cerr <<
"Solver GCR not supported for MDWF" << std::endl;
241 QDPIO::cerr <<
"Unknown Solver type" << std::endl;
247 quda_inv_param.mass = toDouble(invParam.NEFParams.Mass);
248 quda_inv_param.m5=toDouble(-invParam.NEFParams.OverMass);
249 quda_inv_param.Ls=invParam.NEFParams.N5;
251 if ( invParam.NEFParams.N5 >= QUDA_MAX_DWF_LS ) {
252 QDPIO::cerr <<
"LS can be at most " << QUDA_MAX_DWF_LS << std::endl;
257 QDPIO::cout <<
"Ls from matrix: " <<
A->size() << std::endl;
258 QDPIO::cout <<
"Ls from params: " << invParam.NEFParams.N5 << std::endl;
259 QDPIO::cout <<
"Ls from quda: " << quda_inv_param.Ls << std::endl;
260 for(
int s = 0;
s < quda_inv_param.Ls;
s++){
261 quda_inv_param.b_5[
s] = toDouble(invParam.NEFParams.b5[
s]);
262 quda_inv_param.c_5[
s] = toDouble(invParam.NEFParams.c5[
s]);
263 QDPIO::cout <<
" b5[" <<
s<<
"] = " << quda_inv_param.b_5[
s] <<
" c5[" <<
s <<
"] = " << quda_inv_param.c_5[
s] << std::endl;
266 quda_inv_param.tol = toDouble(invParam.RsdTarget);
267 quda_inv_param.maxiter = invParam.MaxIter;
268 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
269 quda_inv_param.pipeline = invParam.Pipeline;
272 quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
275 switch( invParam.solverType ) {
277 if( invParam.cgnrP ) {
278 QDPIO::cout <<
"Doing CGNR solve" << std::endl;
279 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
282 QDPIO::cout <<
"Doing CGNE solve" << std::endl;
283 quda_inv_param.solve_type = QUDA_NORMERR_PC_SOLVE;
289 if( invParam.cgnrP ) {
290 QDPIO::cout <<
"Doing CGNR solve" << std::endl;
291 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
294 QDPIO::cout <<
"Doing CGNE solve" << std::endl;
295 quda_inv_param.solve_type = QUDA_NORMERR_PC_SOLVE;
303 QDPIO::cout <<
"Using Symmetric Linop: A_oo - D_oe A^{-1}_ee D_eo" << std::endl;
304 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD_ASYMMETRIC;
305 quda_inv_param.dagger = QUDA_DAG_NO;
306 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
307 quda_inv_param.cpu_prec = cpu_prec;
308 quda_inv_param.cuda_prec = gpu_prec;
309 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
310 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
311 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
313 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
314 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
316 QDPIO::cout <<
"MDAGM Using QDP-JIT spinor order" << std::endl;
317 quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
318 quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
319 quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
323 if( invParam.tuneDslashP ) {
324 QDPIO::cout <<
"Enabling Dslash Autotuning" << std::endl;
325 quda_inv_param.tune = QUDA_TUNE_YES;
328 QDPIO::cout <<
"Disabling Dslash Autotuning" << std::endl;
329 quda_inv_param.tune = QUDA_TUNE_NO;
334 multi1d<int> face_size(4);
335 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
336 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
337 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
338 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
340 int max_face = face_size[0];
341 for(
int i=1;
i < 4;
i++) {
342 if ( face_size[
i] > max_face ) {
343 max_face = face_size[
i];
347 q_gauge_param.ga_pad = max_face;
349 quda_inv_param.sp_pad = 0;
350 quda_inv_param.cl_pad = 0;
352 QDPIO::cout <<
"Setting Precondition stuff to defaults for not using" << std::endl;
353 quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
354 quda_inv_param.tol_precondition = 1.0e-1;
355 quda_inv_param.maxiter_precondition = 1000;
356 quda_inv_param.verbosity_precondition = QUDA_SILENT;
357 quda_inv_param.gcrNkrylov = 1;
360 if( invParam.verboseP ) {
361 quda_inv_param.verbosity = QUDA_VERBOSE;
364 quda_inv_param.verbosity = QUDA_SUMMARIZE;
370 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
372 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
375 GetMemoryPtrGauge(gauge,links_single);
380 loadGaugeQuda((
void *)gauge, &q_gauge_param);
387 ~LinOpSysSolverQUDANEF()
389 QDPIO::cout <<
"Destructing" << std::endl;
394 const Subset& subset()
const {
return A->subset();}
397 int size()
const {
return invParam.NEFParams.N5;}
405 SystemSolverResults_t
operator() (multi1d<T>&
psi,
const multi1d<T>&
chi)
const
407 SystemSolverResults_t res;
416 Real invTwoKappaB = invParam.NEFParams.b5[0]*(
Nd - invParam.NEFParams.OverMass) + Real(1);
417 Real twoKappaB = Real(1)/invTwoKappaB;
422 const multi1d<int>& latdims = Layout::subgridLattSize();
423 int halfsize=latdims[0]*latdims[1]*latdims[2]*latdims[3]/2;
424 int fermsize=halfsize*Nc*Ns*2;
427 multi1d<T> in1( this->size() );
428 multi1d<T> out1(this->size() );
431 multi1d<T> in2( this->size() );
432 multi1d<T> out2(this->size() );
435 for(
int s=0;
s < this->size();
s++ ) {
441 for(
int d=0;
d < 2;
d++) {
442 for(
int s=0;
s < this->size();
s++ ) {
449 QDPIO::cout <<
"DOing Mat" << std::endl;
450 (*A)(out2, in2,
PLUS);
453 QDPIO::cout <<
"Doing MatDag" << std::endl;
454 (*A)(out2, in2,
MINUS);
458 REAL* spinorIn =
new REAL[quda_inv_param.Ls*fermsize];
459 REAL* spinorOut =
new REAL[quda_inv_param.Ls*fermsize];
460 memset((spinorIn), 0, fermsize*quda_inv_param.Ls*
sizeof(REAL));
461 memset((spinorOut), 0, fermsize*quda_inv_param.Ls*
sizeof(REAL));
463 for(
unsigned int s=0;
s<quda_inv_param.Ls;
s++){
464 memcpy((&spinorIn[fermsize*
s]),&(in1[
s].elem(rb[1].start()).elem(0).elem(0).real()),fermsize*
sizeof(REAL));
469 quda_inv_param.dagger = QUDA_DAG_NO;
470 MatQuda((
void *)spinorOut, (
void *)spinorIn, (QudaInvertParam*)&quda_inv_param);
473 quda_inv_param.dagger = QUDA_DAG_YES;
474 MatQuda((
void *)spinorOut, (
void *)spinorIn, (QudaInvertParam*)&quda_inv_param);
477 for(
unsigned int s=0;
s<quda_inv_param.Ls;
s++){
479 memcpy((&out1[
s].elem(rb[1].start()).elem(0).elem(0).real()),(&spinorOut[fermsize*
s]),fermsize*
sizeof(REAL));
483 for(
int s=0;
s < this->size();
s++) {
484 out1[
s] *= invTwoKappaB;
486 QDPIO::cout <<
"s=" <<
s <<
" diff=" << norm2(out2[
s]-out1[
s]) << std::endl;
498 QDPIO::cout <<
"Norm of chi = " << norm2(
chi,rb[1]) << std::endl;
499 QDPIO::cout <<
"TwoKappa = " << twoKappaB <<
" invTwoKappa_b = " << invTwoKappaB << std::endl;
501 if ( invParam.axialGaugeP ) {
502 multi1d<T> g_chi( this->size());
503 multi1d<T> g_psi( this->size());
506 QDPIO::cout <<
"Gauge Fixing source and initial guess" << std::endl;
507 for(
int s=0;
s<invParam.NEFParams.N5;
s++){
508 g_chi[
s][ rb[1] ] = GFixMat *
chi[
s];
509 g_psi[
s][ rb[1] ] = GFixMat *
psi[
s];
511 QDPIO::cout <<
"Solving" << std::endl;
512 res = qudaInvert(g_chi,g_psi);
513 for(
int s=0;
s < this->size();
s++) {
514 g_psi[
s][rb[1]] *= twoKappaB;
517 QDPIO::cout <<
"Untransforming solution." << std::endl;
518 for(
int s=0;
s<invParam.NEFParams.N5;
s++){
519 psi[
s][ rb[1] ] = adj(GFixMat)*g_psi[
s];
524 QDPIO::cout <<
"Calling qudaInvert" << std::endl;
525 res = qudaInvert(
chi,
psi);
526 for(
int s=0;
s < this->size();
s++) {
527 psi[
s][rb[1]] *= twoKappaB;
538 multi1d<T>
r(
A->size());
539 multi1d<T> Ax(
A->size());
545 for(
int s=0;
s <
A->size();
s++) {
548 b_norm += norm2(
chi[
s], rb[1]);
555 QDPIO::cout <<
"QUDA_"<< solver_string <<
"_NEF_SOLVER: " << res.n_count <<
" iterations. Max. Rsd = " << res.resid <<
" Max. Relative Rsd = " << rel_resid << std::endl;
559 if ( ! invParam.SilentFailP ) {
560 if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
561 QDPIO::cerr <<
"ERROR: QUDA Solver residuum is outside tolerance: QUDA resid="<< rel_resid <<
" Desired =" << invParam.RsdTarget <<
" Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
575 LinOpSysSolverQUDANEF() {}
582 QudaPrecision_s cpu_prec;
583 QudaPrecision_s gpu_prec;
584 QudaPrecision_s gpu_half_prec;
586 Handle< LinearOperatorArray<T> >
A;
587 const SysSolverQUDANEFParams invParam;
588 QudaGaugeParam q_gauge_param;
589 mutable QudaInvertParam quda_inv_param;
591 SystemSolverResults_t qudaInvert(
const multi1d<T>& chi_s, multi1d<T>& psi_s)
const;
4D Even Odd preconditioned NEF domain-wall fermion linear operator
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Periodic ferm state and a creator.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixD > QD