Implementation notes: amd64, genji239, crypto_hashblocks/sha256

Computer: genji239
Architecture: amd64
CPU ID: GenuineIntel-00050654-bfebfbff
SUPERCOP version: 20191017
Operation: crypto_hashblocks
Primitive: sha256
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
1760810691 0 020292 808 776inplacegcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1766610691 0 023041 816 776inplacegcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1804810805 0 020372 808 776refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1811810805 0 023121 816 776refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1954611017 0 020644 808 776inplacegcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1958811001 0 020596 808 776refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1980011246 0 019996 792 776refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
1994011135 0 019884 792 776inplacegcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019102020191017
2044812368 0 027325 856 744reficc_-march=haswell_-mtune=haswell_-O3_-fomit-frame-pointer2019102020191017
2045412368 0 027325 856 744reficc_-march=core-avx2_-mtune=core-avx2_-O3_-fomit-frame-pointer2019102020191017
2045412368 0 027325 856 744reficc_-march=skylake_-mtune=skylake_-O3_-fomit-frame-pointer2019102020191017
2046612368 0 028565 856 776reficc_-march=icelake-client_-mtune=icelake-client_-O2_-fomit-frame-pointer2019102020191017
2046612368 0 028213 856 776reficc_-march=icelake-client_-mtune=icelake-client_-O3_-fomit-frame-pointer2019102020191017
2046612368 0 028005 856 776reficc_-march=skylake-avx512_-mtune=skylake-avx512_-O2_-fomit-frame-pointer2019102020191017
2046612368 0 027893 856 776reficc_-march=skylake-avx512_-mtune=skylake-avx512_-O3_-fomit-frame-pointer2019102020191017
2046612368 0 027725 856 744reficc_-march=skylake_-mtune=skylake_-O2_-fomit-frame-pointer2019102020191017
2047012368 0 027725 856 744reficc_-march=broadwell_-mtune=broadwell_-O2_-fomit-frame-pointer2019102020191017
2047012368 0 027325 856 744reficc_-march=broadwell_-mtune=broadwell_-O3_-fomit-frame-pointer2019102020191017
2047212304 0 027661 856 744inplaceicc_-march=broadwell_-mtune=broadwell_-O2_-fomit-frame-pointer2019102020191017
2047212304 0 028501 856 776inplaceicc_-march=icelake-client_-mtune=icelake-client_-O2_-fomit-frame-pointer2019102020191017
2047212368 0 027725 856 744reficc_-march=core-avx2_-mtune=core-avx2_-O2_-fomit-frame-pointer2019102020191017
2047612304 0 027661 856 744inplaceicc_-march=skylake_-mtune=skylake_-O2_-fomit-frame-pointer2019102020191017
2048212368 0 027725 856 744reficc_-march=haswell_-mtune=haswell_-O2_-fomit-frame-pointer2019102020191017
2048612224 0 027181 856 744inplaceicc_-march=haswell_-mtune=haswell_-O3_-fomit-frame-pointer2019102020191017
2048612224 0 027181 856 744inplaceicc_-march=skylake_-mtune=skylake_-O3_-fomit-frame-pointer2019102020191017
2048812304 0 027661 856 744inplaceicc_-march=haswell_-mtune=haswell_-O2_-fomit-frame-pointer2019102020191017
2049012224 0 027181 856 744inplaceicc_-march=broadwell_-mtune=broadwell_-O3_-fomit-frame-pointer2019102020191017
2049012304 0 027661 856 744inplaceicc_-march=core-avx2_-mtune=core-avx2_-O2_-fomit-frame-pointer2019102020191017
2049612224 0 028069 856 776inplaceicc_-march=icelake-client_-mtune=icelake-client_-O3_-fomit-frame-pointer2019102020191017
2049612304 0 027941 856 776inplaceicc_-march=skylake-avx512_-mtune=skylake-avx512_-O2_-fomit-frame-pointer2019102020191017
2049812224 0 027749 856 776inplaceicc_-march=skylake-avx512_-mtune=skylake-avx512_-O3_-fomit-frame-pointer2019102020191017
2050412224 0 027181 856 744inplaceicc_-march=core-avx2_-mtune=core-avx2_-O3_-fomit-frame-pointer2019102020191017
2382413008 0 026685 856 744reficc_-march=corei7_-mtune=corei7_-O2_-fomit-frame-pointer2019102020191017
2421012992 0 026725 856 744reficc_-march=corei7_-mtune=corei7_-O3_-fomit-frame-pointer2019102020191017
2422812752 0 026485 856 744inplaceicc_-march=corei7_-mtune=corei7_-O3_-fomit-frame-pointer2019102020191017
2427213456 0 028461 856 744inplaceicc_-march=sandybridge_-mtune=sandybridge_-O3_-fomit-frame-pointer2019102020191017
2434012928 0 026605 856 744inplaceicc_-march=corei7_-mtune=corei7_-O2_-fomit-frame-pointer2019102020191017
2434813584 0 028557 856 744reficc_-march=corei7-avx_-mtune=corei7-avx_-O2_-fomit-frame-pointer2019102020191017
2435013584 0 028557 856 744reficc_-march=core-avx-i_-mtune=core-avx-i_-O2_-fomit-frame-pointer2019102020191017
2435613584 0 028557 856 744reficc_-march=ivybridge_-mtune=ivybridge_-O2_-fomit-frame-pointer2019102020191017
2436013456 0 028461 856 744inplaceicc_-march=core-avx-i_-mtune=core-avx-i_-O3_-fomit-frame-pointer2019102020191017
2437213456 0 028461 856 744inplaceicc_-march=ivybridge_-mtune=ivybridge_-O3_-fomit-frame-pointer2019102020191017
2437613456 0 028461 856 744inplaceicc_-march=corei7-avx_-mtune=corei7-avx_-O3_-fomit-frame-pointer2019102020191017
2438013584 0 028557 856 744reficc_-march=sandybridge_-mtune=sandybridge_-O2_-fomit-frame-pointer2019102020191017
2440213584 0 028589 856 744reficc_-march=sandybridge_-mtune=sandybridge_-O3_-fomit-frame-pointer2019102020191017
2441213632 0 028605 856 744inplaceicc_-march=corei7-avx_-mtune=corei7-avx_-O2_-fomit-frame-pointer2019102020191017
2441213584 0 028589 856 744reficc_-march=ivybridge_-mtune=ivybridge_-O3_-fomit-frame-pointer2019102020191017
2441413632 0 028605 856 744inplaceicc_-march=core-avx-i_-mtune=core-avx-i_-O2_-fomit-frame-pointer2019102020191017
2441413632 0 028605 856 744inplaceicc_-march=sandybridge_-mtune=sandybridge_-O2_-fomit-frame-pointer2019102020191017
2441413584 0 028589 856 744reficc_-march=corei7-avx_-mtune=corei7-avx_-O3_-fomit-frame-pointer2019102020191017
2441613632 0 028605 856 744inplaceicc_-march=ivybridge_-mtune=ivybridge_-O2_-fomit-frame-pointer2019102020191017
2441613584 0 028589 856 744reficc_-march=core-avx-i_-mtune=core-avx-i_-O3_-fomit-frame-pointer2019102020191017

Test failure

Implementation: dolbeau/amd64-sha
Security model: unknown
Compiler: icc -march=broadwell -mtune=broadwell -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 28, namely:
CompilerImplementations
icc -march=broadwell -mtune=broadwell -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=broadwell -mtune=broadwell -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=core-avx-i -mtune=core-avx-i -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=core-avx-i -mtune=core-avx-i -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=core-avx2 -mtune=core-avx2 -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=core-avx2 -mtune=core-avx2 -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=corei7-avx -mtune=corei7-avx -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=corei7-avx -mtune=corei7-avx -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=corei7 -mtune=corei7 -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=corei7 -mtune=corei7 -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=haswell -mtune=haswell -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=haswell -mtune=haswell -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=icelake-client -mtune=icelake-client -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=icelake-client -mtune=icelake-client -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=ivybridge -mtune=ivybridge -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=ivybridge -mtune=ivybridge -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=sandybridge -mtune=sandybridge -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=sandybridge -mtune=sandybridge -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=skylake-avx512 -mtune=skylake-avx512 -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=skylake-avx512 -mtune=skylake-avx512 -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=skylake -mtune=skylake -O2 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=skylake -mtune=skylake -O3 -fomit-frame-pointer dolbeau/amd64-sha
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer inplace
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer inplace
icc -march=cannonlake -mtune=cannonlake -O2 -fomit-frame-pointer ref
icc -march=cannonlake -mtune=cannonlake -O3 -fomit-frame-pointer ref

Compiler output

Implementation: dolbeau/amd64-sha
Security model: unknown
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
blocks.c: In file included from /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/include/immintrin.h:99,
blocks.c: from blocks.c:37:
blocks.c: blocks.c: In function 'crypto_hashblocks_sha256_dolbeau_amd64_sha':
blocks.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/include/shaintrin.h:87:1: error: inlining failed in call to always_inline '_mm_sha256rnds2_epu32': target specific option mismatch
blocks.c: 87 | _mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
blocks.c: | ^~~~~~~~~~~~~~~~~~~~~
blocks.c: blocks.c:146:8: note: called from here
blocks.c: 146 | s0 = _mm_sha256rnds2_epu32(x0, s1, h1)
blocks.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
blocks.c: blocks.c:188:3: note: in expansion of macro 'DO16ROUNDS'
blocks.c: 188 | DO16ROUNDS(j0, j1, j2, j3, cc, cd, ce, cf);
blocks.c: | ^~~~~~~~~~
blocks.c: In file included from /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/include/immintrin.h:99,
blocks.c: from blocks.c:37:
blocks.c: /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/include/shaintrin.h:87:1: error: inlining failed in call to always_inline '_mm_sha256rnds2_epu32': target specific option mismatch
blocks.c: 87 | _mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
blocks.c: | ^~~~~~~~~~~~~~~~~~~~~
blocks.c: blocks.c:144:8: note: called from here
blocks.c: 144 | s1 = _mm_sha256rnds2_epu32(x1, x0, h1); \
blocks.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
blocks.c: blocks.c:188:3: note: in expansion of macro 'DO16ROUNDS'
blocks.c: 188 | DO16ROUNDS(j0, j1, j2, j3, cc, cd, ce, cf);
blocks.c: | ^~~~~~~~~~
blocks.c: In file included from /software/compilers/gcc/gcc-9.2.0-full+isl+binutils/lib/gcc/x86_64-pc-linux-gnu/9.2.0/include/immintrin.h:99,
blocks.c: from blocks.c:37:
blocks.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/amd64-sha
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/amd64-sha
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/amd64-sha
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/amd64-sha