/* Compute initial*2^n mod (2^63-165) the Slow Way, starting from several different initial values, demonstrating instruction-level parallelism (ILP). Copyright 2019 Ken Takusagawa This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . To compile: g++ -Wall -O3 -march=native -mtune=native slowexponentiation.cpp */ #include #include using std::cout; using std::endl; #define BITWIDTH 128 // stop after 1 iteration, for time measurement. //#define WHENTOSTOP major<1 // go forever #define WHENTOSTOP // interval between periodic output // 30 tends to be on the order of seconds #define TIMERSHIFT 32 const unsigned long timer = static_cast(1) << TIMERSHIFT; #if BITWIDTH==128 #define ILP 5 // ILP=5 for Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz (ILP=5 is faster(!) than ILP=4 and ILP=6) // also Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz (but things were monotonic) #endif #if BITWIDTH==64 #define ILP 8 // ILP=7 for Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz // ILP=8 for Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz #endif #if BITWIDTH==64 typedef uint64_t U; #endif #if BITWIDTH==128 typedef unsigned __int128 U; #endif const U one = 1; #if BITWIDTH==64 //165 259 387 549 669 891 915 1011 1069 1179 1221 1299 const unsigned long offset= 165; #endif #if BITWIDTH==128 // 309 507 957 1141 1485 1741 2437 2509 const unsigned long offset= 309; #endif const U p = (one << (BITWIDTH-1)) - offset; #if BITWIDTH==64 #define RENDER(x) (x) #endif #if BITWIDTH==128 #define RENDER(x) static_cast((x)>>64) << " *2^64+ " << static_cast((x)) #endif // This output is designed for Pari/GP // XXX this will not skip certain starting values. #define OUTPUT(x) cout << "lift( " << RENDER(n##x) << " - " << (2*x+1) << "*Mod(2,2^" << BITWIDTH-1 << "-" << offset << ")^(" << major << "*2^" << TIMERSHIFT << "))" << endl int main(int argc,char**argv){ cout << "$Id: slowexponentiation.cpp,v 1.21 2019/12/13 16:34:38 kenta Exp $" << endl; cout << "iterations per output" << timer << endl; cout << "BITWIDTH " << BITWIDTH << " ; sizeof = " << sizeof(U) << " ; ILP " << ILP << " ; TIMERSHIFT " << TIMERSHIFT << endl; cout << "modulus " << RENDER(p) << " = 2^(BITWIDTH-1) - " << static_cast( (one <<(BITWIDTH-1))-p) << endl; //cout << "timer " << timer << endl; //We put things in variables instead of an array to encourage the compiler to use registers. /* For 2^127-309, avoid start numbers which are multiples of 309. Starting at 309 is the same as starting at 1, shifted by 127. Starting at 3*309 is the same as starting at 3, shifted by 127. */ U n0=1; #if ILP!=1 U n1=3; #if ILP!=2 U n2=5; #if ILP!=3 U n3=7; #if ILP!=4 U n4=9; #if ILP!=5 U n5=11; #if ILP!=6 U n6=13; #if ILP!=7 U n7=15; #if ILP!=8 U n8=17; #if ILP!=9 U n9=19; #if ILP!=10 U n10=21; #if ILP!=11 U n11=23; #if ILP!=12 U n12=25; #if ILP!=13 U n13=27; #if ILP!=13 U n14=29; #if ILP!=15 U n15=31; #if ILP!=16 U n16=33; #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif unsigned long major; for(major=0;WHENTOSTOP;++major){ cout << "major= " << major << " ; time= " << time(0) << endl; OUTPUT(0); #if ILP!=1 OUTPUT(1); #if ILP!=2 OUTPUT(2); #if ILP!=3 OUTPUT(3); #if ILP!=4 OUTPUT(4); #if ILP!=5 OUTPUT(5); #if ILP!=6 OUTPUT(6); #if ILP!=7 OUTPUT(7); #if ILP!=8 OUTPUT(8); #if ILP!=9 OUTPUT(9); #if ILP!=10 OUTPUT(10); #if ILP!=11 OUTPUT(11); #if ILP!=12 OUTPUT(12); #if ILP!=13 OUTPUT(13); #if ILP!=14 OUTPUT(14); #if ILP!=15 OUTPUT(15); #if ILP!=16 OUTPUT(16); #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif cout << endl; for(unsigned long i=0; i=p) n##x-=p gencode(0); #if ILP!=1 gencode(1); #if ILP!=2 gencode(2); #if ILP!=3 gencode(3); #if ILP!=4 gencode(4); #if ILP!=5 gencode(5); #if ILP!=6 gencode(6); #if ILP!=7 gencode(7); #if ILP!=8 gencode(8); #if ILP!=9 gencode(9); #if ILP!=10 gencode(10); #if ILP!=11 gencode(11); #if ILP!=12 gencode(12); #if ILP!=13 gencode(13); #if ILP!=14 gencode(14); #if ILP!=15 gencode(15); #if ILP!=16 gencode(16); #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif } } cout << "end ; time= " << time(0) << endl; OUTPUT(0); #if ILP!=1 OUTPUT(1); #if ILP!=2 OUTPUT(2); #if ILP!=3 OUTPUT(3); #if ILP!=4 OUTPUT(4); #if ILP!=5 OUTPUT(5); #if ILP!=6 OUTPUT(6); #if ILP!=7 OUTPUT(7); #if ILP!=8 OUTPUT(8); #if ILP!=9 OUTPUT(9); #if ILP!=10 OUTPUT(10); #if ILP!=11 OUTPUT(11); #if ILP!=12 OUTPUT(12); #if ILP!=13 OUTPUT(13); #if ILP!=14 OUTPUT(14); #if ILP!=15 OUTPUT(15); #if ILP!=16 OUTPUT(16); #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif }