/*
Compute initial*2^n mod (2^63-165) the Slow Way, starting from several
different initial values, demonstrating instruction-level parallelism
(ILP).
Copyright 2019 Ken Takusagawa
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
To compile:
g++ -Wall -O3 -march=native -mtune=native slowexponentiation.cpp
*/
#include
#include
using std::cout;
using std::endl;
#define BITWIDTH 128
// stop after 1 iteration, for time measurement.
//#define WHENTOSTOP major<1
// go forever
#define WHENTOSTOP
// interval between periodic output
// 30 tends to be on the order of seconds
#define TIMERSHIFT 32
const unsigned long timer = static_cast(1) << TIMERSHIFT;
#if BITWIDTH==128
#define ILP 5
// ILP=5 for Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz (ILP=5 is faster(!) than ILP=4 and ILP=6)
// also Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz (but things were monotonic)
#endif
#if BITWIDTH==64
#define ILP 8
// ILP=7 for Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
// ILP=8 for Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz
#endif
#if BITWIDTH==64
typedef uint64_t U;
#endif
#if BITWIDTH==128
typedef unsigned __int128 U;
#endif
const U one = 1;
#if BITWIDTH==64
//165 259 387 549 669 891 915 1011 1069 1179 1221 1299
const unsigned long offset= 165;
#endif
#if BITWIDTH==128
// 309 507 957 1141 1485 1741 2437 2509
const unsigned long offset= 309;
#endif
const U p = (one << (BITWIDTH-1)) - offset;
#if BITWIDTH==64
#define RENDER(x) (x)
#endif
#if BITWIDTH==128
#define RENDER(x) static_cast((x)>>64) << " *2^64+ " << static_cast((x))
#endif
// This output is designed for Pari/GP
// XXX this will not skip certain starting values.
#define OUTPUT(x) cout << "lift( " << RENDER(n##x) << " - " << (2*x+1) << "*Mod(2,2^" << BITWIDTH-1 << "-" << offset << ")^(" << major << "*2^" << TIMERSHIFT << "))" << endl
int main(int argc,char**argv){
cout << "$Id: slowexponentiation.cpp,v 1.21 2019/12/13 16:34:38 kenta Exp $" << endl;
cout << "iterations per output" << timer << endl;
cout << "BITWIDTH " << BITWIDTH << " ; sizeof = " << sizeof(U) << " ; ILP " << ILP << " ; TIMERSHIFT " << TIMERSHIFT << endl;
cout << "modulus " << RENDER(p) << " = 2^(BITWIDTH-1) - " << static_cast( (one <<(BITWIDTH-1))-p) << endl;
//cout << "timer " << timer << endl;
//We put things in variables instead of an array to encourage the compiler to use registers.
/* For 2^127-309, avoid start numbers which are multiples of 309.
Starting at 309 is the same as starting at 1, shifted by 127.
Starting at 3*309 is the same as starting at 3, shifted by 127.
*/
U n0=1;
#if ILP!=1
U n1=3;
#if ILP!=2
U n2=5;
#if ILP!=3
U n3=7;
#if ILP!=4
U n4=9;
#if ILP!=5
U n5=11;
#if ILP!=6
U n6=13;
#if ILP!=7
U n7=15;
#if ILP!=8
U n8=17;
#if ILP!=9
U n9=19;
#if ILP!=10
U n10=21;
#if ILP!=11
U n11=23;
#if ILP!=12
U n12=25;
#if ILP!=13
U n13=27;
#if ILP!=13
U n14=29;
#if ILP!=15
U n15=31;
#if ILP!=16
U n16=33;
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
unsigned long major;
for(major=0;WHENTOSTOP;++major){
cout << "major= " << major << " ; time= " << time(0) << endl;
OUTPUT(0);
#if ILP!=1
OUTPUT(1);
#if ILP!=2
OUTPUT(2);
#if ILP!=3
OUTPUT(3);
#if ILP!=4
OUTPUT(4);
#if ILP!=5
OUTPUT(5);
#if ILP!=6
OUTPUT(6);
#if ILP!=7
OUTPUT(7);
#if ILP!=8
OUTPUT(8);
#if ILP!=9
OUTPUT(9);
#if ILP!=10
OUTPUT(10);
#if ILP!=11
OUTPUT(11);
#if ILP!=12
OUTPUT(12);
#if ILP!=13
OUTPUT(13);
#if ILP!=14
OUTPUT(14);
#if ILP!=15
OUTPUT(15);
#if ILP!=16
OUTPUT(16);
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
cout << endl;
for(unsigned long i=0;
i=p) n##x-=p
gencode(0);
#if ILP!=1
gencode(1);
#if ILP!=2
gencode(2);
#if ILP!=3
gencode(3);
#if ILP!=4
gencode(4);
#if ILP!=5
gencode(5);
#if ILP!=6
gencode(6);
#if ILP!=7
gencode(7);
#if ILP!=8
gencode(8);
#if ILP!=9
gencode(9);
#if ILP!=10
gencode(10);
#if ILP!=11
gencode(11);
#if ILP!=12
gencode(12);
#if ILP!=13
gencode(13);
#if ILP!=14
gencode(14);
#if ILP!=15
gencode(15);
#if ILP!=16
gencode(16);
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
}
}
cout << "end ; time= " << time(0) << endl;
OUTPUT(0);
#if ILP!=1
OUTPUT(1);
#if ILP!=2
OUTPUT(2);
#if ILP!=3
OUTPUT(3);
#if ILP!=4
OUTPUT(4);
#if ILP!=5
OUTPUT(5);
#if ILP!=6
OUTPUT(6);
#if ILP!=7
OUTPUT(7);
#if ILP!=8
OUTPUT(8);
#if ILP!=9
OUTPUT(9);
#if ILP!=10
OUTPUT(10);
#if ILP!=11
OUTPUT(11);
#if ILP!=12
OUTPUT(12);
#if ILP!=13
OUTPUT(13);
#if ILP!=14
OUTPUT(14);
#if ILP!=15
OUTPUT(15);
#if ILP!=16
OUTPUT(16);
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
}