CHROMA
mre_extrap_predictor.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 /*! \file
3  * \brief Minimal residual predictor
4  *
5  * Predictors for HMC
6  */
7 
8 #ifndef __mre_extrap_predictor_h__
9 #define __mre_extrap_predictor_h__
10 
11 #include "chromabase.h"
12 #include "handle.h"
17 #include "meas/eig/gramschm.h"
18 
19 namespace Chroma
20 {
21 
22  /*! @ingroup predictor */
23  namespace MinimalResidualExtrapolation4DChronoPredictorEnv
24  {
25  extern const std::string name;
26  bool registerAll();
27  }
28 
29  //! Minimal residual predictor
30  /*! @ingroup predictor */
31  template<typename T>
34  {
35  private:
38 
41 
42  void
44  T& psi,
45  const T& chi,
46  const CircularBuffer<T>& chrono_buf,
47  const CircularBuffer<T>& chrono_bufM,
48  const Subset& s
49  ) const
50  {
51  START_CODE();
52 
53  int Nvec = chrono_buf.size();
54 
55 
56  // Now I need to form G_n m = v_[n]^{dag} A v[m]
57  multi2d<DComplex> G(Nvec,Nvec);
58 
59  for(int m = 0 ; m < Nvec; m++) {
60  for(int n = 0; n < Nvec; n++) {
61  G(n,m) = innerProduct(chrono_buf[n], chrono_bufM[m], s);
62  }
63  }
64 
65  // Now I need to form b_n = v[n]^{dag} chi
66  multi1d<DComplex> b(Nvec);
67 
68  for(int n = 0; n < Nvec; n++) {
69  b[n] = innerProduct(chrono_buf[n], chi, s);
70  }
71 
72  // Solve G_nm a_m = b_n:
73 
74  // First LU decompose G in place
75  multi1d<DComplex> a(Nvec);
76 
77  LUSolve(a, G, b);
78 
79  // Check solution
80  multi1d<DComplex> Ga(Nvec);
81 
82  for(int i=0; i < Nvec; i++) {
83  Ga[i] = G(i,0)*a[0];
84  for(int j=1; j < Nvec; j++) {
85  Ga[i] += G(i,j)*a[j];
86  }
87  }
88 
89  multi1d<DComplex> r(Nvec);
90  for(int i=0; i < Nvec; i++) {
91  r[i] = b[i]-Ga[i];
92  }
93 
94  // Create teh lnear combination
95  psi[s] = Complex(a[0])*chrono_buf[0];
96  for(int n=1; n < Nvec; n++) {
97  psi[s] += Complex(a[n])*chrono_buf[n];
98  }
99 
100 
101 
102  END_CODE();
103  }
104 
105 
106  void
108  T& psi,
109  const LinearOperator<T>& M,
110  const T& chi,
111  const Handle<CircularBuffer<T> >& chrono_buf,
112  enum PlusMinus isign)
113  {
114  START_CODE();
115 
116  const Subset& s= M.subset();
117 
118 
119 #if 0
120  {
121  T r;
122  r[s] = chi;
123  T tmp;
124  M(tmp, psi, isign);
125  r[s] -= tmp;
126  Double norm_r = sqrt(norm2(r,s));
127  Double norm_chi = sqrt(norm2(chi,s));
128  QDPIO::cout << "MRE Predictor: before prediction || r || / || b || =" << norm_r/norm_chi << std::endl;
129  }
130 #endif
131 
132  int Nvec = chrono_buf->size();
133 
134 
135  // Construct an orthonormal basis from the
136  // vectors in the buffer. Stick to notation of paper and call these
137  // v
138  multi1d<T> v(Nvec);
139 
140  for(int i=0; i < Nvec; i++) {
141  // Zero out the non subsetted part
142  v[i] = zero;
143 
144  // Grab the relevant std::vector from the chronobuf
145  T tmpvec;
146  chrono_buf->get(i, tmpvec);
147 
148  if( i == 0 ) {
149  // First std::vector we just take
150  v[i][s] = tmpvec;
151  }
152  else {
153  // i-th std::vector. Orthogonalise against i-1 previous
154  // std::vector, but i is an index running from 0. So I need
155  // to pass i+1-1=i as the number of vectors to orthog against
156  //
157  // This is a very dumb GramSchmidt process and possibly
158  // unstable, however apparently (according to the paper)
159  // this is OK.
160  GramSchm(tmpvec, v, i, s);
161  v[i][s] = tmpvec;
162  }
163  // QDPIO::cout << "Norm v[i] = " << norm2(v[i],s) << std::endl;
164  // Normalise v[i]
165  Double norm = sqrt(norm2(v[i], s));
166  v[i][s] /= norm;
167 
168  }
169 
170  // Now I need to form G_n m = v_[n]^{dag} A v[m]
171  multi2d<DComplex> G(Nvec,Nvec);
172 
173  for(int m = 0 ; m < Nvec; m++) {
174  T tmpvec;
175  M(tmpvec, v[m], isign);
176 
177  for(int n = 0; n < Nvec; n++) {
178  G(n,m) = innerProduct(v[n], tmpvec, s);
179  }
180  }
181 
182  // Now I need to form b_n = v[n]^{dag} chi
183  multi1d<DComplex> b(Nvec);
184 
185  for(int n = 0; n < Nvec; n++) {
186  b[n] = innerProduct(v[n], chi, s);
187  }
188 
189  // Solve G_nm a_m = b_n:
190 
191  // First LU decompose G in place
192  multi1d<DComplex> a(Nvec);
193 
194  LUSolve(a, G, b);
195 
196  // Check solution
197  multi1d<DComplex> Ga(Nvec);
198 
199  for(int i=0; i < Nvec; i++) {
200  Ga[i] = G(i,0)*a[0];
201  for(int j=1; j < Nvec; j++) {
202  Ga[i] += G(i,j)*a[j];
203  }
204  }
205 
206  multi1d<DComplex> r(Nvec);
207  for(int i=0; i < Nvec; i++) {
208  r[i] = b[i]-Ga[i];
209  }
210 
211 #if 0
212  QDPIO::cout << "Constraint Eq Solution Check" << std::endl;
213  for(int i=0; i < Nvec; i++) {
214  QDPIO::cout << " r[ " << i << "] = " << r[i] << std::endl;
215  }
216 #endif
217 
218  // Create teh lnear combination
219  psi[s] = Complex(a[0])*v[0];
220  for(int n=1; n < Nvec; n++) {
221  psi[s] += Complex(a[n])*v[n];
222  }
223 
224 
225 #if 0
226  {
227  T r;
228  r[s] = chi;
229  T tmp;
230  M(tmp, psi, isign);
231  r[s] -= tmp;
232  Double norm_r = sqrt(norm2(r,s));
233  Double norm_chi = sqrt(norm2(chi,s));
234  QDPIO::cout << "MRE Predictor: after prediction || r || / || b || =" << norm_r/norm_chi << std::endl;
235  }
236 #endif
237 
238  END_CODE();
239  }
240 
241 
242 
243  public:
244 
246  chrono_bufX(new CircularBuffer<T>(max_chrono)),
247  chrono_bufY(new CircularBuffer<T>(max_chrono)),
248  chrono_bufMX(new CircularBuffer<T>(max_chrono)),
249  chrono_bufMY(new CircularBuffer<T>(max_chrono)){}
250 
251 
252  void reset(void) {
253  chrono_bufX->reset();
254  chrono_bufY->reset();
255  chrono_bufMX->reset();
256  chrono_bufMY->reset();
257  }
258 
259  // Destructor is automagic
261 
262  // Predict X new way.
263  // M is not needed anymore
264  void predictX(T& X, const T& chi, const Subset& s) const override
265  {
266  START_CODE();
267  StopWatch swatch;
268  swatch.reset();
269  swatch.start();
270 
271  int Nvec = chrono_bufX->size();
272  switch(Nvec) {
273  case 0:
274  {
275  QDPIO::cout << "MRE Predictor: Zero vectors stored. Giving you zero guess" << std::endl;
276  X= zero;
277  }
278  break;
279  case 1:
280  {
281  QDPIO::cout << "MRE Predictor: Only 1 std::vector stored. Giving you last solution " << std::endl;
282  chrono_bufX->get(0,X);
283  }
284  break;
285  default:
286  {
287  QDPIO::cout << "MRE Predictor: Finding X extrapolation with "<< Nvec << " vectors" << std::endl;
288 
289  // Expect M is either MdagM if we use chi
290  // or M if we minimize against Y
292  }
293  break;
294  }
295 
296  swatch.stop();
297  QDPIO::cout << "MRE_PREDICT_X_TIME = " << swatch.getTimeInSeconds() << " s" << std::endl;
298 
299  END_CODE();
300  }
301 
302  void predictY(T& Y, const T& chi, const Subset& s) const override
303  {
304  START_CODE();
305  StopWatch swatch;
306  swatch.reset();
307  swatch.start();
308 
309  int Nvec = chrono_bufY->size();
310  switch(Nvec) {
311  case 0:
312  {
313  QDPIO::cout << "MRE Predictor: Zero vectors stored. Giving you zero guess" << std::endl;
314  Y= zero;
315  }
316  break;
317  case 1:
318  {
319  QDPIO::cout << "MRE Predictor: Only 1 std::vector stored. Giving you last solution " << std::endl;
320  chrono_bufY->get(0,Y);
321  }
322  break;
323  default:
324  {
325  QDPIO::cout << "MRE Predictor: Finding Y extrapolation with "<< Nvec << " vectors" << std::endl;
326 
327  // Expect M is either MdagM if we use chi
328  // or M if we minimize against Y
330  }
331  break;
332  }
333 
334  swatch.stop();
335  QDPIO::cout << "MRE_PREDICT_Y_TIME = " << swatch.getTimeInSeconds() << " s" << std::endl;
336 
337  END_CODE();
338  }
339 
340  void predictX(T& X,
341  const LinearOperator<T>& M,
342  const T& chi)
343  {
344  START_CODE();
345  StopWatch swatch;
346  swatch.reset();
347  swatch.start();
348 
349  int Nvec = chrono_bufX->size();
350  switch(Nvec) {
351  case 0:
352  {
353  QDPIO::cout << "MRE Predictor: Zero vectors stored. Giving you zero guess" << std::endl;
354  X= zero;
355  }
356  break;
357  case 1:
358  {
359  QDPIO::cout << "MRE Predictor: Only 1 std::vector stored. Giving you last solution " << std::endl;
360  chrono_bufX->get(0,X);
361  }
362  break;
363  default:
364  {
365  QDPIO::cout << "MRE Predictor: Finding X extrapolation with "<< Nvec << " vectors" << std::endl;
366 
367  // Expect M is either MdagM if we use chi
368  // or M if we minimize against Y
370  }
371  break;
372  }
373 
374  swatch.stop();
375  QDPIO::cout << "MRE_PREDICT_X_TIME = " << swatch.getTimeInSeconds() << " s" << std::endl;
376 
377  END_CODE();
378  }
379 
380  void predictY(T& Y,
381  const LinearOperator<T>& M,
382  const T& chi)
383  {
384  START_CODE();
385  StopWatch swatch;
386  swatch.reset();
387  swatch.start();
388 
389  int Nvec = chrono_bufY->size();
390  switch(Nvec) {
391  case 0:
392  {
393  QDPIO::cout << "MRE Predictor: Zero vectors stored. Giving you zero guess" << std::endl;
394  Y= zero;
395  }
396  break;
397  case 1:
398  {
399  QDPIO::cout << "MRE Predictor: Only 1 std::vector stored. Giving you last solution " << std::endl;
400  chrono_bufY->get(0,Y);
401  }
402  break;
403  default:
404  {
405  QDPIO::cout << "MRE Predictor: Finding Y extrapolation with "<< Nvec << " vectors" << std::endl;
406 
407  // Expect M is either MdagM if we use chi
408  // or M if we minimize against Y
410  }
411  break;
412  }
413 
414  swatch.stop();
415  QDPIO::cout << "MRE_PREDICT_Y_TIME = " << swatch.getTimeInSeconds() << " s" << std::endl;
416 
417  END_CODE();
418  }
419 
420  void checkOrthoNormal( const CircularBuffer<T>& buffer, const Subset& s) const
421  {
422  for(int i=0; i < buffer.size(); ++i) {
423  for(int j=0; j < buffer.size(); ++j ) {
424  DComplex ip = innerProduct(buffer[i], buffer[j], s);
425  if( i==j ) {
426  if( toBool( fabs(Double(1)-real(ip)) > Double(1.0e-12) ) ) {
427  QDPIO::cout << "Lack of normalization: < v["<<i<<"] | v[" << j <<"] > = " << ip << std::endl;
428  QDP_abort(1);
429  }
430  if( toBool( fabs(imag(ip)) > Double(1.0e-12) ) ) {
431  QDPIO::cout << "Lack of normalization: < v["<<i<<"] | v[" << j <<"] > = " << ip << std::endl;
432 
433  QDP_abort(1);
434  }
435  }
436  else {
437  if( toBool( fabs(real(ip)) > Double(1.0e-12) ) ) {
438  QDPIO::cout << "Lack of orthogonality: < v["<<i<<"] | v[" << j <<"] > = " << ip << std::endl;
439  QDP_abort(1);
440  }
441  if( toBool( fabs(imag(ip)) > Double(1.0e-12) ) ) {
442  QDPIO::cout << "Lack of orthogonality: < v["<<i<<"] | v[" << j <<"] > = " << ip << std::endl;
443  QDP_abort(1);
444  }
445  }
446  }
447  }
448  }
449 
450  // Orthonormalize x against previous buffer vectors
451  // Implication is that X will be pushed soon.
452  // So if the buffer is full N elements only orthogonalize against the last N-1
453  void orthonormPrevious(const CircularBuffer<T>& buffer, T& x, const Subset& s) const
454  {
455  // Normalize as we will make it orthogonal to other normalized vectors
456  // This is for stability and the correct thing to do for an empty buffer
457  // or a buffer with 1 entry (which will get pushed down)
458  Double nx= Double(1)/sqrt(norm2(x,s));
459  x[s] *= nx;
460 
461  // If buffer doesn't yet contain the maximum number
462  // of vectors, the last vector will not be kicked out
463  // so orthog against all existing vectors
464 
465  // Gram Schmidt against previous
466  for(int i=0; i < buffer.size(); ++i) {
467  DComplex iprod = innerProduct(buffer[i],x,s);
468  x[s] -= iprod*buffer[i];
469  }
470 
471  // 2nd Gram Schmidt against previous
472  for(int i=0; i < buffer.size(); ++i) {
473  DComplex iprod = innerProduct(buffer[i],x,s);
474  x[s] -= iprod*buffer[i];
475  }
476 
477  // RE-normalize
478  nx= Double(1)/sqrt(norm2(x,s));
479  x[s] *= nx;
480  }
481 
482 
483  void newXVector(const T& X_in, const LinearOperator<T>& M) override
484  {
485  START_CODE();
486  StopWatch swatch;
487  swatch.reset();
488  swatch.start();
489 
490  QDPIO::cout << "MREPredictor: registering new X solution. " << std::endl;
491  T X = X_in;
492 
493  // Orthonormalize X against current vectors
495 
496  // Push it into the buffer
497  chrono_bufX->push(X);
498 
500 
501  // Apply M
502  T tmpvec=zero;
503 
504  M(tmpvec,X,PLUS);
505  chrono_bufMX->push(tmpvec);
506 
507  QDPIO::cout << "MREPredictor: number of X vectors stored is = " << chrono_bufX->size() << " and MX vectors = " << chrono_bufMX->size() << std::endl;
508  swatch.stop();
509  QDPIO::cout << "MRE Predictor: X_VEC_REGISTRATION_TIME = " << swatch.getTimeInSeconds() << " sec. \n";
510 
511  END_CODE();
512  }
513 
514 
515 
516 
517  void newXVector(const T& X)
518  {
519  START_CODE();
520 
521  QDPIO::cout << "MREPredictor: registering new X solution. " << std::endl;
522 
523  chrono_bufX->push(X);
524 
525 
526  QDPIO::cout << "MREPredictor: number of X vectors stored is = " << chrono_bufX->size() << std::endl;
527 
528  END_CODE();
529  }
530 
531 
532  void newYVector(const T& Y)
533  {
534  START_CODE();
535 
536  QDPIO::cout << "MREPredictor: registering new Y solution. " << std::endl;
537  chrono_bufY->push(Y);
538  QDPIO::cout << "MREPredictor: number of Y vectors stored is = " << chrono_bufY->size() << std::endl;
539 
540  END_CODE();
541  }
542 
543  void newYVector(const T& Y_in, const LinearOperator<T>& M) override
544  {
545  START_CODE();
546  StopWatch swatch;
547  swatch.reset();
548  swatch.start();
549 
550  QDPIO::cout << "MREPredictor: registering new Y solution. " << std::endl;
551  T Y=Y_in;
552 
553  // Orthonormalize Y against current vectors
555 
556  // Push it into the buffer
557  chrono_bufY->push(Y);
558 
560 
561  // Apply M
562  T tmpvec=zero;
563  M(tmpvec,Y,MINUS);
564  chrono_bufMY->push(tmpvec);
565 
566  QDPIO::cout << "MREPredictor: number of Y vectors stored is = " << chrono_bufY->size() << " and MY vectors = " << chrono_bufMY->size() << std::endl;
567  swatch.stop();
568  QDPIO::cout << "MRE Predictor: Y_VEC_REGISTRATION_TIME = " << swatch.getTimeInSeconds() << " sec. \n";
569 
570  END_CODE();
571  }
572 
573  void replaceXHead(const T& v)
574  {
575  chrono_bufX->replaceHead(v);
576  }
577 
578  void replaceYHead(const T& v)
579  {
580  chrono_bufY->replaceHead(v);
581  }
582 
583 
584 
585 
586  };
587 
588 
589 
590  /*! @ingroup predictor */
591  namespace MinimalResidualExtrapolation5DChronoPredictorEnv
592  {
593  extern const std::string name;
594  bool registerAll();
595  }
596 
597 
598  //! Minimal residual predictor
599  /*! @ingroup predictor */
601  public AbsChronologicalPredictor5D<LatticeFermion> {
602 
603  private:
605  const int N5;
606 
607  void find_extrap_solution(multi1d<LatticeFermion>& psi,
609  const multi1d<LatticeFermion>& chi);
610 
611  public:
612  MinimalResidualExtrapolation5DChronoPredictor(const int N5_, const unsigned int max_chrono) : chrono_buf( new CircularBufferArray<LatticeFermion>(max_chrono, N5_) ), N5(N5_) {}
613 
614 
616 
617  // Copying
619 
620  // Zero out psi -- it is a zero guess after all
621  void operator()(multi1d<LatticeFermion>& psi,
623  const multi1d<LatticeFermion>& chi);
624 
625  // No internal state so reset is a Nop
626  void reset(void) {
627  chrono_buf->reset();
628 
629  }
630 
631  // Ignore new std::vector
632  // Ignore new std::vector
633  void newVector(const multi1d<LatticeFermion>& psi)
634  {
635  START_CODE();
636 
637  QDPIO::cout << "MRE Predictor: registering new solution. " << std::endl;
638  chrono_buf->push(psi);
639  QDPIO::cout << "MRE Predictor: number of vectors stored is = " << chrono_buf->size() << std::endl;
640 
641  END_CODE();
642  }
643 
644  };
645 
646 } // End Namespace Chroma
647 
648 #endif
Primary include file for CHROMA library code.
Chronological predictor for HMC.
Monomial factories.
Circular buffers.
Abstract interface for a Chronological Solution predictor in 5D.
Abstract interface for a Chronological Solution predictor.
Circular buffer of arrays.
Circular Buffer.
int size(void) const
Get the current number of items stored.
Class for counted reference semantics.
Definition: handle.h:33
Linear Operator to arrays.
Definition: linearop.h:61
Linear Operator.
Definition: linearop.h:27
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
void orthonormPrevious(const CircularBuffer< T > &buffer, T &x, const Subset &s) const
void newXVector(const T &X_in, const LinearOperator< T > &M) override
void checkOrthoNormal(const CircularBuffer< T > &buffer, const Subset &s) const
void newYVector(const T &Y_in, const LinearOperator< T > &M) override
void predictY(T &Y, const T &chi, const Subset &s) const override
void predictX(T &X, const T &chi, const Subset &s) const override
void predictY(T &Y, const LinearOperator< T > &M, const T &chi)
void predictX(T &X, const LinearOperator< T > &M, const T &chi)
void find_extrap_solutionM(T &psi, const T &chi, const CircularBuffer< T > &chrono_buf, const CircularBuffer< T > &chrono_bufM, const Subset &s) const
void find_extrap_solution(T &psi, const LinearOperator< T > &M, const T &chi, const Handle< CircularBuffer< T > > &chrono_buf, enum PlusMinus isign)
MinimalResidualExtrapolation5DChronoPredictor(const MinimalResidualExtrapolation5DChronoPredictor &p)
MinimalResidualExtrapolation5DChronoPredictor(const int N5_, const unsigned int max_chrono)
void operator()(multi1d< LatticeFermion > &psi, const LinearOperatorArray< LatticeFermion > &A, const multi1d< LatticeFermion > &chi)
void find_extrap_solution(multi1d< LatticeFermion > &psi, const LinearOperatorArray< LatticeFermion > &A, const multi1d< LatticeFermion > &chi)
void newVector(const multi1d< LatticeFermion > &psi)
Handle< CircularBufferArray< LatticeFermion > > chrono_buf
Gramm-Schmidt orthogonolization.
void GramSchm(multi1d< LatticeFermion > &psi, const int Npsi, const multi1d< LatticeFermion > &vec, const int Nvec, const Subset &sub)
Gramm-Schmidt orthogonolization.
Definition: gramschm.cc:127
void LUSolve(multi1d< DComplex > &a, const multi2d< DComplex > &M, const multi1d< DComplex > &b)
Solve M a = b by LU decomposition with partial pivoting.
Definition: lu_solve.cc:8
Class for counted reference semantics.
unsigned j
Definition: ldumul_w.cc:35
unsigned n
Definition: ldumul_w.cc:36
LU solver.
static int m[4]
Definition: make_seeds.cc:16
int x
Definition: meslate.cc:34
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
multi1d< LatticeColorMatrix > G
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
LinOpSysSolverMGProtoClover::T T
int i
Definition: pbg5p_w.cc:55
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
START_CODE()
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
::std::string string
Definition: gtest.h:1979
int norm
Definition: qtopcor.cc:35