Implementation notes: amd64, genji262, crypto_hash/mgrostl256

Computer: genji262
Architecture: amd64
CPU ID: AuthenticAMD-00800f12-178bfbff
SUPERCOP version: 20191017
Operation: crypto_hash
Primitive: mgrostl256
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
33682209916 16384 0224125 17280 744opt-64icc_-march=corei7_-mtune=corei7_-O3_-fomit-frame-pointer2019102420191017
33968213452 16384 0228925 17280 744opt-64icc_-march=core-avx-i_-mtune=core-avx-i_-O3_-fomit-frame-pointer2019102420191017
33990213684 16384 0229093 17280 744opt-64icc_-march=broadwell_-mtune=broadwell_-O3_-fomit-frame-pointer2019102420191017
33990213684 16384 0229093 17280 744opt-64icc_-march=haswell_-mtune=haswell_-O3_-fomit-frame-pointer2019102420191017
33990213452 16384 0228925 17280 744opt-64icc_-march=ivybridge_-mtune=ivybridge_-O3_-fomit-frame-pointer2019102420191017
33990213452 16384 0228925 17280 744opt-64icc_-march=sandybridge_-mtune=sandybridge_-O3_-fomit-frame-pointer2019102420191017
34012213684 16384 0229093 17280 744opt-64icc_-march=skylake_-mtune=skylake_-O3_-fomit-frame-pointer2019102420191017
34078213452 16384 0228925 17280 744opt-64icc_-march=corei7-avx_-mtune=corei7-avx_-O3_-fomit-frame-pointer2019102420191017
38830222236 16384 0236389 17280 744opt-64icc_-march=corei7_-mtune=corei7_-O2_-fomit-frame-pointer2019102420191017
38852224420 16384 0239861 17280 744opt-64icc_-march=sandybridge_-mtune=sandybridge_-O2_-fomit-frame-pointer2019102420191017
38918224420 16384 0239861 17280 744opt-64icc_-march=corei7-avx_-mtune=corei7-avx_-O2_-fomit-frame-pointer2019102420191017
39138224420 16384 0239861 17280 744opt-64icc_-march=core-avx-i_-mtune=core-avx-i_-O2_-fomit-frame-pointer2019102420191017
39270224420 16384 0239861 17280 744opt-64icc_-march=ivybridge_-mtune=ivybridge_-O2_-fomit-frame-pointer2019102420191017
39336222204 16384 0238013 17280 744opt-64icc_-march=core-avx2_-mtune=core-avx2_-O2_-fomit-frame-pointer2019102420191017
39380222204 16384 0238013 17280 744opt-64icc_-march=broadwell_-mtune=broadwell_-O2_-fomit-frame-pointer2019102420191017
39512222204 16384 0238013 17280 744opt-64icc_-march=skylake_-mtune=skylake_-O2_-fomit-frame-pointer2019102420191017
40744222204 16384 0238013 17280 744opt-64icc_-march=haswell_-mtune=haswell_-O2_-fomit-frame-pointer2019102420191017
46706213684 16384 0229093 17280 744opt-64icc_-march=core-avx2_-mtune=core-avx2_-O3_-fomit-frame-pointer2019102420191017
6144671968 0 046669 856 744opt-32icc_-march=corei7_-mtune=corei7_-O3_-fomit-frame-pointer2019102420191017
6146868504 0 046341 856 744opt-32icc_-march=core-avx2_-mtune=core-avx2_-O3_-fomit-frame-pointer2019102420191017
6146868504 0 046341 856 744opt-32icc_-march=skylake_-mtune=skylake_-O3_-fomit-frame-pointer2019102420191017
6151273240 0 048197 856 744opt-32icc_-march=core-avx-i_-mtune=core-avx-i_-O3_-fomit-frame-pointer2019102420191017
6151268504 0 046341 856 744opt-32icc_-march=haswell_-mtune=haswell_-O3_-fomit-frame-pointer2019102420191017
6153468504 0 046341 856 744opt-32icc_-march=broadwell_-mtune=broadwell_-O3_-fomit-frame-pointer2019102420191017
6162273240 0 048197 856 744opt-32icc_-march=ivybridge_-mtune=ivybridge_-O3_-fomit-frame-pointer2019102420191017
6164473240 0 048197 856 744opt-32icc_-march=sandybridge_-mtune=sandybridge_-O3_-fomit-frame-pointer2019102420191017
6171072360 0 048077 856 744opt-32icc_-march=corei7-avx_-mtune=corei7-avx_-O2_-fomit-frame-pointer2019102420191017
6171072360 0 048077 856 744opt-32icc_-march=sandybridge_-mtune=sandybridge_-O2_-fomit-frame-pointer2019102420191017
6173272360 0 048077 856 744opt-32icc_-march=ivybridge_-mtune=ivybridge_-O2_-fomit-frame-pointer2019102420191017
6175472360 0 048077 856 744opt-32icc_-march=core-avx-i_-mtune=core-avx-i_-O2_-fomit-frame-pointer2019102420191017
6177673240 0 048197 856 744opt-32icc_-march=corei7-avx_-mtune=corei7-avx_-O3_-fomit-frame-pointer2019102420191017
6215071280 0 046565 856 744opt-32icc_-march=corei7_-mtune=corei7_-O2_-fomit-frame-pointer2019102420191017
6219467848 0 046669 856 744opt-32icc_-march=core-avx2_-mtune=core-avx2_-O2_-fomit-frame-pointer2019102420191017
6221667848 0 046669 856 744opt-32icc_-march=broadwell_-mtune=broadwell_-O2_-fomit-frame-pointer2019102420191017
6221667848 0 046669 856 744opt-32icc_-march=haswell_-mtune=haswell_-O2_-fomit-frame-pointer2019102420191017
6230467848 0 046669 856 744opt-32icc_-march=skylake_-mtune=skylake_-O2_-fomit-frame-pointer2019102420191017
6454850097 0 037097 816 776opt-32gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
6562646304 0 033460 808 776opt-32gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
6666045838 0 033076 808 776opt-32gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
7277651627 0 038445 816 736opt-32clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
7321650338 0 037997 816 752opt-32clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
7370050178 0 037325 816 752opt-32clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
7370050178 0 037325 816 752opt-32clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
7447045421 0 032145 800 736opt-32clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
7554842536 0 030564 792 776opt-32gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
131841610239 384 022052 1192 752refclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
132675411983 384 024388 1192 752refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
133408010239 384 022052 1192 752refclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
133834816375 384 028796 1192 736refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
13708644081 384 013424 1176 736refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2019102420191017
287333223688 384 039118 1248 744reficc_-march=skylake_-mtune=skylake_-O3_-fomit-frame-pointer2019102420191017
287397023688 384 039118 1248 744reficc_-march=broadwell_-mtune=broadwell_-O3_-fomit-frame-pointer2019102420191017
287863423688 384 039118 1248 744reficc_-march=core-avx2_-mtune=core-avx2_-O3_-fomit-frame-pointer2019102420191017
288021819096 384 033259 1248 744reficc_-march=corei7_-mtune=corei7_-O2_-fomit-frame-pointer2019102420191017
288794023688 384 039118 1248 744reficc_-march=haswell_-mtune=haswell_-O3_-fomit-frame-pointer2019102420191017
289157021808 384 037302 1248 744reficc_-march=sandybridge_-mtune=sandybridge_-O3_-fomit-frame-pointer2019102420191017
289194421808 384 037302 1248 744reficc_-march=core-avx-i_-mtune=core-avx-i_-O3_-fomit-frame-pointer2019102420191017
289740021808 384 037302 1248 744reficc_-march=ivybridge_-mtune=ivybridge_-O3_-fomit-frame-pointer2019102420191017
289832421808 384 037302 1248 744reficc_-march=corei7-avx_-mtune=corei7-avx_-O3_-fomit-frame-pointer2019102420191017
292166619448 384 033667 1248 744reficc_-march=corei7_-mtune=corei7_-O3_-fomit-frame-pointer2019102420191017
293343621280 384 036742 1248 744reficc_-march=core-avx-i_-mtune=core-avx-i_-O2_-fomit-frame-pointer2019102420191017
293405221280 384 036742 1248 744reficc_-march=corei7-avx_-mtune=corei7-avx_-O2_-fomit-frame-pointer2019102420191017
293598821280 384 036742 1248 744reficc_-march=ivybridge_-mtune=ivybridge_-O2_-fomit-frame-pointer2019102420191017
294201623224 384 039054 1248 744reficc_-march=broadwell_-mtune=broadwell_-O2_-fomit-frame-pointer2019102420191017
294278623224 384 039054 1248 744reficc_-march=core-avx2_-mtune=core-avx2_-O2_-fomit-frame-pointer2019102420191017
295306023224 384 039054 1248 744reficc_-march=skylake_-mtune=skylake_-O2_-fomit-frame-pointer2019102420191017
296934023224 384 039054 1248 744reficc_-march=haswell_-mtune=haswell_-O2_-fomit-frame-pointer2019102420191017
298529021280 384 036742 1248 744reficc_-march=sandybridge_-mtune=sandybridge_-O2_-fomit-frame-pointer2019102420191017
43092503794 384 012935 1176 776refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
441841415768 384 027492 1200 776refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
46155345336 384 015495 1192 776refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017
51082904575 384 014583 1192 776refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102420191017

Test failure

Implementation: opt-32
Security model: unknown
Compiler: icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 18, namely:
CompilerImplementations
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer opt-32
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer opt-32
icc -march=icelake-client -mtune=icelake-client -O2 -fomit-frame-pointer opt-32
icc -march=icelake-client -mtune=icelake-client -O3 -fomit-frame-pointer opt-32
icc -march=skylake-avx512 -mtune=skylake-avx512 -O2 -fomit-frame-pointer opt-32
icc -march=skylake-avx512 -mtune=skylake-avx512 -O3 -fomit-frame-pointer opt-32
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer opt-64
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer opt-64
icc -march=icelake-client -mtune=icelake-client -O2 -fomit-frame-pointer opt-64
icc -march=icelake-client -mtune=icelake-client -O3 -fomit-frame-pointer opt-64
icc -march=skylake-avx512 -mtune=skylake-avx512 -O2 -fomit-frame-pointer opt-64
icc -march=skylake-avx512 -mtune=skylake-avx512 -O3 -fomit-frame-pointer opt-64
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer ref
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer ref
icc -march=icelake-client -mtune=icelake-client -O2 -fomit-frame-pointer ref
icc -march=icelake-client -mtune=icelake-client -O3 -fomit-frame-pointer ref
icc -march=skylake-avx512 -mtune=skylake-avx512 -O2 -fomit-frame-pointer ref
icc -march=skylake-avx512 -mtune=skylake-avx512 -O3 -fomit-frame-pointer ref

Compiler output

Implementation: opt-64
Security model: unknown
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
hash.c: hash.c:239:3: warning: non-constant static local variable in inline function may be different in different files [-Wstatic-local-in-inline]
hash.c: static u64 y[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c: hash.c:238:1: note: use 'static' to give inline function 'F1024' internal linkage
hash.c: inline void F1024(u64 *h, const u64 *m) {
hash.c: ^
hash.c: static
hash.c: hash.c:240:3: warning: non-constant static local variable in inline function may be different in different files [-Wstatic-local-in-inline]
hash.c: static u64 z[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c: hash.c:238:1: note: use 'static' to give inline function 'F1024' internal linkage
hash.c: inline void F1024(u64 *h, const u64 *m) {
hash.c: ^
hash.c: static
hash.c: hash.c:241:3: warning: non-constant static local variable in inline function may be different in different files [-Wstatic-local-in-inline]
hash.c: static u64 outQ[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c: hash.c:238:1: note: use 'static' to give inline function 'F1024' internal linkage
hash.c: inline void F1024(u64 *h, const u64 *m) {
hash.c: ^
hash.c: static
hash.c: hash.c:242:3: warning: non-constant static local variable in inline function may be different in different files [-Wstatic-local-in-inline]
hash.c: static u64 inP[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c: hash.c:238:1: note: use 'static' to give inline function 'F1024' internal linkage
hash.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE opt-64
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE opt-64
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE opt-64
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE opt-64
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE opt-64

Compiler output

Implementation: opt-64
Security model: unknown
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
hash.c: hash.c:242:14: warning: 'inP' is static but declared in inline function 'F1024' which is not static
hash.c: 242 | static u64 inP[COLS1024] __attribute__((aligned(16)));
hash.c: | ^~~
hash.c: hash.c:241:14: warning: 'outQ' is static but declared in inline function 'F1024' which is not static
hash.c: 241 | static u64 outQ[COLS1024] __attribute__((aligned(16)));
hash.c: | ^~~~
hash.c: hash.c:240:14: warning: 'z' is static but declared in inline function 'F1024' which is not static
hash.c: 240 | static u64 z[COLS1024] __attribute__((aligned(16)));
hash.c: | ^
hash.c: hash.c:239:14: warning: 'y' is static but declared in inline function 'F1024' which is not static
hash.c: 239 | static u64 y[COLS1024] __attribute__((aligned(16)));
hash.c: | ^
try.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/../../../../x86_64-pc-linux-gnu/bin/ld: crypto_hash_mgrostl256.a(hash.o): in function `Transform':
try.c: hash.c:(.text+0x...): undefined reference to `F512'
try.c: collect2: error: ld returned 1 exit status

Number of similar (compiler,implementation) pairs: 3, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE opt-64
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE opt-64
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE opt-64

Compiler output

Implementation: opt-64
Security model: unknown
Compiler: gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE
hash.c: hash.c:242:14: warning: 'inP' is static but declared in inline function 'F1024' which is not static
hash.c: 242 | static u64 inP[COLS1024] __attribute__((aligned(16)));
hash.c: | ^~~
hash.c: hash.c:241:14: warning: 'outQ' is static but declared in inline function 'F1024' which is not static
hash.c: 241 | static u64 outQ[COLS1024] __attribute__((aligned(16)));
hash.c: | ^~~~
hash.c: hash.c:240:14: warning: 'z' is static but declared in inline function 'F1024' which is not static
hash.c: 240 | static u64 z[COLS1024] __attribute__((aligned(16)));
hash.c: | ^
hash.c: hash.c:239:14: warning: 'y' is static but declared in inline function 'F1024' which is not static
hash.c: 239 | static u64 y[COLS1024] __attribute__((aligned(16)));
hash.c: | ^
try.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/../../../../x86_64-pc-linux-gnu/bin/ld: crypto_hash_mgrostl256.a(hash.o): in function `Transform':
try.c: hash.c:(.text+0x...): undefined reference to `F512'
try.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/../../../../x86_64-pc-linux-gnu/bin/ld: crypto_hash_mgrostl256.a(hash.o): in function `Update':
try.c: hash.c:(.text+0x...): undefined reference to `F512'
try.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/../../../../x86_64-pc-linux-gnu/bin/ld: hash.c:(.text+0x...): undefined reference to `F512'
try.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/../../../../x86_64-pc-linux-gnu/bin/ld: crypto_hash_mgrostl256.a(hash.o): in function `Final':
try.c: hash.c:(.text+0x...): undefined reference to `F512'
try.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/../../../../x86_64-pc-linux-gnu/bin/ld: hash.c:(.text+0x...): undefined reference to `F512'
try.c: collect2: error: ld returned 1 exit status

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE opt-64

Compiler output

Implementation: opt-64
Security model: unknown
Compiler: icc -march=broadwell -mtune=broadwell -O2 -fomit-frame-pointer
hash.c: hash.c(242): warning #1172: a variable with static storage duration cannot be defined within an inline function
hash.c: static u64 inP[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c:
hash.c: hash.c(241): warning #1172: a variable with static storage duration cannot be defined within an inline function
hash.c: static u64 outQ[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c:
hash.c: hash.c(240): warning #1172: a variable with static storage duration cannot be defined within an inline function
hash.c: static u64 z[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c:
hash.c: hash.c(239): warning #1172: a variable with static storage duration cannot be defined within an inline function
hash.c: static u64 y[COLS1024] __attribute__((aligned(16)));
hash.c: ^
hash.c:

Number of similar (compiler,implementation) pairs: 24, namely:
CompilerImplementations
icc -march=broadwell -mtune=broadwell -O2 -fomit-frame-pointer opt-64
icc -march=broadwell -mtune=broadwell -O3 -fomit-frame-pointer opt-64
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer opt-64
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer opt-64
icc -march=core-avx-i -mtune=core-avx-i -O2 -fomit-frame-pointer opt-64
icc -march=core-avx-i -mtune=core-avx-i -O3 -fomit-frame-pointer opt-64
icc -march=core-avx2 -mtune=core-avx2 -O2 -fomit-frame-pointer opt-64
icc -march=core-avx2 -mtune=core-avx2 -O3 -fomit-frame-pointer opt-64
icc -march=corei7-avx -mtune=corei7-avx -O2 -fomit-frame-pointer opt-64
icc -march=corei7-avx -mtune=corei7-avx -O3 -fomit-frame-pointer opt-64
icc -march=corei7 -mtune=corei7 -O2 -fomit-frame-pointer opt-64
icc -march=corei7 -mtune=corei7 -O3 -fomit-frame-pointer opt-64
icc -march=haswell -mtune=haswell -O2 -fomit-frame-pointer opt-64
icc -march=haswell -mtune=haswell -O3 -fomit-frame-pointer opt-64
icc -march=icelake-client -mtune=icelake-client -O2 -fomit-frame-pointer opt-64
icc -march=icelake-client -mtune=icelake-client -O3 -fomit-frame-pointer opt-64
icc -march=ivybridge -mtune=ivybridge -O2 -fomit-frame-pointer opt-64
icc -march=ivybridge -mtune=ivybridge -O3 -fomit-frame-pointer opt-64
icc -march=sandybridge -mtune=sandybridge -O2 -fomit-frame-pointer opt-64
icc -march=sandybridge -mtune=sandybridge -O3 -fomit-frame-pointer opt-64
icc -march=skylake-avx512 -mtune=skylake-avx512 -O2 -fomit-frame-pointer opt-64
icc -march=skylake-avx512 -mtune=skylake-avx512 -O3 -fomit-frame-pointer opt-64
icc -march=skylake -mtune=skylake -O2 -fomit-frame-pointer opt-64
icc -march=skylake -mtune=skylake -O3 -fomit-frame-pointer opt-64