Implementation notes: x86, katana, crypto_dh/nist521gs

Computer: katana
Architecture: x86
CPU ID: GenuineIntel-000006f6-bfebfbff
SUPERCOP version: 20170105
Operation: crypto_dh
Primitive: nist521gs

Compiler output

Implementation: crypto_dh/nist521gs/ref
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
dh.c: dh.c:13:9: error: '__int128' is not supported on this target
dh.c: typedef __int128 type128;
dh.c: ^
dh.c: dh.c:13:18: warning: type defaults to 'int' in declaration of 'type128' [-Wimplicit-int]
dh.c: typedef __int128 type128;
dh.c: ^
dh.c: dh.c: In function 'gsqr':
dh.c: dh.c:278:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[1]*x[8]+(type128)x[2]*x[7]+(type128)x[3]*x[6]+(type128)x[4]*x[5])+(type128)x[0]*x[0]+2*(t1>>58);
dh.c: ^
dh.c: dh.c:280:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*((type128)x[2]*x[8]+(type128)x[3]*x[7]+(type128)x[4]*x[6])+2*((type128)x[0]*x[1]+(type128)x[5]*x[5])+(t2>>58);
dh.c: ^
dh.c: dh.c:282:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[3]*x[8]+(type128)x[4]*x[7]+(type128)x[5]*x[6])+2*(type128)x[0]*x[2]+(type128)x[1]*x[1]+(t1>>58);
dh.c: ^
dh.c: dh.c:284:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*((type128)x[4]*x[8]+(type128)x[5]*x[7])+2*((type128)x[0]*x[3]+(type128)x[1]*x[2]+(type128)x[6]*x[6])+(t2>>58);
dh.c: ^
dh.c: dh.c:286:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[5]*x[8]+(type128)x[6]*x[7])+2*((type128)x[0]*x[4]+(type128)x[1]*x[3])+(type128)x[2]*x[2]+(t1>>58);
dh.c: ^
dh.c: dh.c:288:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*(type128)x[6]*x[8]+2*((type128)x[0]*x[5]+(type128)x[1]*x[4]+(type128)x[2]*x[3]+(type128)x[7]*x[7])+(t2>>58);
dh.c: ^
dh.c: ...

Number of similar (compiler,implementation) pairs: 166, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer ref
gcc -m32 -O2 -fomit-frame-pointer ref
gcc -m32 -O3 -fomit-frame-pointer ref
gcc -m32 -O -fomit-frame-pointer ref
gcc -m32 -Os -fomit-frame-pointer ref
gcc -m32 -march=athlon -O2 -fomit-frame-pointer ref
gcc -m32 -march=athlon -O3 -fomit-frame-pointer ref
gcc -m32 -march=athlon -O -fomit-frame-pointer ref
gcc -m32 -march=athlon -Os -fomit-frame-pointer ref
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer ref
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer ref
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer ref
gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer ref
gcc -m32 -march=core-avx2 -O -fomit-frame-pointer ref
gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer ref
gcc -m32 -march=core2 -O2 -fomit-frame-pointer ref
gcc -m32 -march=core2 -O3 -fomit-frame-pointer ref
gcc -m32 -march=core2 -O -fomit-frame-pointer ref
gcc -m32 -march=core2 -Os -fomit-frame-pointer ref
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer ref
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer ref
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer ref
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer ref
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer ref
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer ref
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer ref
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer ref
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer ref
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer ref
gcc -m32 -march=corei7 -O -fomit-frame-pointer ref
gcc -m32 -march=corei7 -Os -fomit-frame-pointer ref
gcc -m32 -march=i386 -O2 -fomit-frame-pointer ref
gcc -m32 -march=i386 -O3 -fomit-frame-pointer ref
gcc -m32 -march=i386 -O -fomit-frame-pointer ref
gcc -m32 -march=i386 -Os -fomit-frame-pointer ref
gcc -m32 -march=i486 -O2 -fomit-frame-pointer ref
gcc -m32 -march=i486 -O3 -fomit-frame-pointer ref
gcc -m32 -march=i486 -O -fomit-frame-pointer ref
gcc -m32 -march=i486 -Os -fomit-frame-pointer ref
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer ref
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer ref
gcc -m32 -march=k6-2 -O -fomit-frame-pointer ref
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer ref
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer ref
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer ref
gcc -m32 -march=k6-3 -O -fomit-frame-pointer ref
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer ref
gcc -m32 -march=k6 -O2 -fomit-frame-pointer ref
gcc -m32 -march=k6 -O3 -fomit-frame-pointer ref
gcc -m32 -march=k6 -O -fomit-frame-pointer ref
gcc -m32 -march=k6 -Os -fomit-frame-pointer ref
gcc -m32 -march=k8 -O2 -fomit-frame-pointer ref
gcc -m32 -march=k8 -O3 -fomit-frame-pointer ref
gcc -m32 -march=k8 -O -fomit-frame-pointer ref
gcc -m32 -march=k8 -Os -fomit-frame-pointer ref
gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer ref
gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer ref
gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer ref
gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer ref
gcc -m32 -march=nocona -O2 -fomit-frame-pointer ref
gcc -m32 -march=nocona -O3 -fomit-frame-pointer ref
gcc -m32 -march=nocona -O -fomit-frame-pointer ref
gcc -m32 -march=nocona -Os -fomit-frame-pointer ref
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentium-m -O -fomit-frame-pointer ref
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer ref
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer ref
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer ref
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentium2 -O -fomit-frame-pointer ref
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer ref
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentium3 -O -fomit-frame-pointer ref
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer ref
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentium4 -O -fomit-frame-pointer ref
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer ref
gcc -m32 -march=pentium -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentium -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentium -O -fomit-frame-pointer ref
gcc -m32 -march=pentium -Os -fomit-frame-pointer ref
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer ref
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer ref
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer ref
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer ref
gcc -m32 -march=prescott -O2 -fomit-frame-pointer ref
gcc -m32 -march=prescott -O3 -fomit-frame-pointer ref
gcc -m32 -march=prescott -O -fomit-frame-pointer ref
gcc -m32 -march=prescott -Os -fomit-frame-pointer ref

Compiler output

Implementation: crypto_dh/nist521gs/ref
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
dh.c: dh.c:13:9: error: '__int128' is not supported on this target
dh.c: typedef __int128 type128;
dh.c: ^
dh.c: dh.c:13:18: warning: type defaults to 'int' in declaration of 'type128' [-Wimplicit-int]
dh.c: typedef __int128 type128;
dh.c: ^
dh.c: dh.c: In function 'gsqr':
dh.c: dh.c:278:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[1]*x[8]+(type128)x[2]*x[7]+(type128)x[3]*x[6]+(type128)x[4]*x[5])+(type128)x[0]*x[0]+2*(t1>>58);
dh.c: ^
dh.c: dh.c:280:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*((type128)x[2]*x[8]+(type128)x[3]*x[7]+(type128)x[4]*x[6])+2*((type128)x[0]*x[1]+(type128)x[5]*x[5])+(t2>>58);
dh.c: ^
dh.c: dh.c:282:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[3]*x[8]+(type128)x[4]*x[7]+(type128)x[5]*x[6])+2*(type128)x[0]*x[2]+(type128)x[1]*x[1]+(t1>>58);
dh.c: ^
dh.c: dh.c:284:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*((type128)x[4]*x[8]+(type128)x[5]*x[7])+2*((type128)x[0]*x[3]+(type128)x[1]*x[2]+(type128)x[6]*x[6])+(t2>>58);
dh.c: ^
dh.c: dh.c:286:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[5]*x[8]+(type128)x[6]*x[7])+2*((type128)x[0]*x[4]+(type128)x[1]*x[3])+(type128)x[2]*x[2]+(t1>>58);
dh.c: ^
dh.c: dh.c:288:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*(type128)x[6]*x[8]+2*((type128)x[0]*x[5]+(type128)x[1]*x[4]+(type128)x[2]*x[3]+(type128)x[7]*x[7])+(t2>>58);
dh.c: ^
dh.c: ...
dh.c: dh.c:13:9: error: '__int128' is not supported on this target
dh.c: typedef __int128 type128;
dh.c: ^
dh.c: dh.c:13:18: warning: type defaults to 'int' in declaration of 'type128' [-Wimplicit-int]
dh.c: typedef __int128 type128;
dh.c: ^
dh.c: dh.c: In function 'gsqr':
dh.c: dh.c:278:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[1]*x[8]+(type128)x[2]*x[7]+(type128)x[3]*x[6]+(type128)x[4]*x[5])+(type128)x[0]*x[0]+2*(t1>>58);
dh.c: ^
dh.c: dh.c:280:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*((type128)x[2]*x[8]+(type128)x[3]*x[7]+(type128)x[4]*x[6])+2*((type128)x[0]*x[1]+(type128)x[5]*x[5])+(t2>>58);
dh.c: ^
dh.c: dh.c:282:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[3]*x[8]+(type128)x[4]*x[7]+(type128)x[5]*x[6])+2*(type128)x[0]*x[2]+(type128)x[1]*x[1]+(t1>>58);
dh.c: ^
dh.c: dh.c:284:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*((type128)x[4]*x[8]+(type128)x[5]*x[7])+2*((type128)x[0]*x[3]+(type128)x[1]*x[2]+(type128)x[6]*x[6])+(t2>>58);
dh.c: ^
dh.c: dh.c:286:111: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t2=4*((type128)x[5]*x[8]+(type128)x[6]*x[7])+2*((type128)x[0]*x[4]+(type128)x[1]*x[3])+(type128)x[2]*x[2]+(t1>>58);
dh.c: ^
dh.c: dh.c:288:109: warning: right shift count >= width of type [-Wshift-count-overflow]
dh.c: t1=4*(type128)x[6]*x[8]+2*((type128)x[0]*x[5]+(type128)x[1]*x[4]+(type128)x[2]*x[3]+(type128)x[7]*x[7])+(t2>>58);
dh.c: ^
dh.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer ref
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer ref
gcc -m32 -march=barcelona -O -fomit-frame-pointer ref
gcc -m32 -march=barcelona -Os -fomit-frame-pointer ref