Implementation notes: amd64, hydra4, crypto_hash/blake256

Computer: hydra4
Microarchitecture: amd64; K10 32nm (300f10)
Architecture: amd64
CPU ID: AuthenticAMD-00300f10-178bfbff
SUPERCOP version: 20240107
Operation: crypto_hash
Primitive: blake256
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
203829886 0 019507 756 920bswapgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
204079886 0 019507 756 920regsgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
205508116 0 021000 812 888sse2clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
207037444 0 019480 812 888sse2clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
207318116 0 021000 812 888sse2clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2081510280 0 023160 812 888bswapclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
208177710 0 017814 804 888sse2-2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
208249624 0 021640 812 888regsclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2083110296 0 023160 812 888regsclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2083710296 0 023160 812 888regsclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2085810280 0 023160 812 888bswapclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
208649584 0 021600 812 888bswapclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
208729680 0 020318 804 888bswapclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
209019888 0 020542 804 888regsclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
209047371 0 017494 804 888sse2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
209579952 0 020535 772 952bswapgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2103210354 0 021680 780 952regsgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2103510354 0 021680 780 952bswapgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2110610794 0 023296 780 952regsgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2113710792 0 023296 780 952bswapgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
211478455 0 021320 812 888sse2-2clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
211478455 0 021320 812 888sse2-2clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
212047783 0 019800 812 888sse2-2clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2168224944 0 037576 780 952sphlibgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2170224718 0 036176 780 952sphlibgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2199024770 0 036904 812 888sphlibclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2200024834 0 037832 812 888sphlibclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2200124834 0 037832 812 888sphlibclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2207010650 0 021239 772 952regsgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
223538387 0 019022 804 888sse2clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2236110008 0 020134 804 888bswapclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
223788844 0 019502 804 888sse2-2clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2238110032 0 020134 804 888regsclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2275324927 0 035174 804 888sphlibclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
2293023549 0 034320 780 952sphlibgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
230448734 0 019327 772 952sse2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
232819508 0 020095 772 952sse2-2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2346822758 0 032523 756 920sphlibgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
2540625239 0 036070 804 888sphlibclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
258299370 0 020696 780 952sse2-2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
259489810 0 022312 780 952sse2-2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
296996900 0 016651 756 920sphlib-smallgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
301098730 0 020056 780 952sse2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
301669170 0 021672 780 952sse2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
306308032 0 018824 780 952sphlib-smallgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
350872449 0 012083 756 920refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
364878690 0 021688 812 888sphlib-smallclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
367618610 0 020760 812 888sphlib-smallclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
368378690 0 021688 812 888sphlib-smallclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
368868407 0 018646 804 888sphlib-smallclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
369902819 0 013423 772 952refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
383643582 0 016440 812 888refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
391118338 0 017971 756 920sse2-2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
392362782 0 014792 812 888refclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
392993582 0 016440 812 888refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
394418919 0 019766 804 888sphlib-smallclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
4113011278 0 022736 780 952sphlib-smallgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
4222611568 0 024200 780 952sphlib-smallgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
4302510751 0 020371 756 920sandygcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
430633037 0 013678 804 888refclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
4337510555 0 023416 812 888sandyclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
433849859 0 021888 812 888sandyclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
4341210555 0 023416 812 888sandyclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
434329955 0 020606 804 888sandyclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
4447910284 0 020390 804 888sandyclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
4458711624 0 024128 780 952sandygcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
4462811186 0 022512 780 952sandygcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
460232759 0 012870 804 888refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2023121420231212
464854314 0 016800 780 952refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
4699010514 0 021111 772 952sandygcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
469947618 0 017267 756 920sse2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212
504873378 0 014704 780 952refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2023121420231212

Compiler output

Implementation: avxs
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
hash.c: hash.c:155:61: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_final' that is compiled without support for 'ssse3'
hash.c: __m128i w0 = _mm_load_si128((__m128i*)(&S->h[0])); w0 = _mm_shuffle_epi8(w0, u32to8);
hash.c: ^
hash.c: hash.c:156:61: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_final' that is compiled without support for 'ssse3'
hash.c: __m128i w1 = _mm_load_si128((__m128i*)(&S->h[4])); w1 = _mm_shuffle_epi8(w1, u32to8);
hash.c: ^
hash.c: 2 errors generated.

Number of similar (compiler,implementation) pairs: 5, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avxs
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avxs
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avxs
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avxs
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avxs

Compiler output

Implementation: avxs
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
hash.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:35,
hash.c: from /usr/lib/gcc/x86_64-linux-gnu/10/include/x86intrin.h:32,
hash.c: from hash.c:7:
hash.c: hash.c: In function 'blake256_final':
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:156:61: note: called from here
hash.c: 156 | __m128i w1 = _mm_load_si128((__m128i*)(&S->h[4])); w1 = _mm_shuffle_epi8(w1, u32to8);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:35,
hash.c: from /usr/lib/gcc/x86_64-linux-gnu/10/include/x86intrin.h:32,
hash.c: from hash.c:7:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:155:61: note: called from here
hash.c: 155 | __m128i w0 = _mm_load_si128((__m128i*)(&S->h[0])); w0 = _mm_shuffle_epi8(w0, u32to8);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:35,
hash.c: from /usr/lib/gcc/x86_64-linux-gnu/10/include/x86intrin.h:32,
hash.c: from hash.c:7:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE avxs
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE avxs
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE avxs
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE avxs

Compiler output

Implementation: sse41
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
hash.c: In file included from hash.c:121:
hash.c: ./rounds.sse41.h:17:55: warning: implicit conversion from 'long' to 'int' changes value from 2242054355 to -2052912941 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3964562569, 698298832, 57701188, 2242054355);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:17:22: warning: implicit conversion from 'long' to 'int' changes value from 3964562569 to -330404727 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3964562569, 698298832, 57701188, 2242054355);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:20:33: warning: implicit conversion from 'long' to 'int' changes value from 2752067618 to -1542899678 [-Wconstant-conversion]
hash.c: buf1 = _mm_set_epi32(137296536, 2752067618, 320440878, 608135816);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:47:34: warning: implicit conversion from 'long' to 'int' changes value from 3380367581 to -914599715 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3041331479, 3380367581, 887688300, 953160567);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:47:22: warning: implicit conversion from 'long' to 'int' changes value from 3041331479 to -1253635817 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3041331479, 3380367581, 887688300, 953160567);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:50:46: warning: implicit conversion from 'long' to 'int' changes value from 3193202383 to -1101764913 [-Wconstant-conversion]
hash.c: buf1 = _mm_set_epi32(1065670069, 3232508343, 3193202383, 1160258022);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:50:34: warning: implicit conversion from 'long' to 'int' changes value from 3232508343 to -1062458953 [-Wconstant-conversion]
hash.c: buf1 = _mm_set_epi32(1065670069, 3232508343, 3193202383, 1160258022);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.sse41.h:81:57: warning: implicit conversion from 'long' to 'int' changes value from 3193202383 to -1101764913 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(137296536, 3041331479, 1160258022, 3193202383);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41

Compiler output

Implementation: sse41
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
hash.c: In file included from hash.c:4:
hash.c: hash.c: In function 'blake256_compress':
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:107:8: note: called from here
hash.c: 107 | m3 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(datablock + 48)), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from hash.c:4:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:106:8: note: called from here
hash.c: 106 | m2 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(datablock + 32)), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from hash.c:4:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:105:8: note: called from here
hash.c: 105 | m1 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(datablock + 16)), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from hash.c:4:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41

Compiler output

Implementation: sse41-2
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
hash.c: In file included from hash.c:2:
hash.c: ./blake256.h:105:15: warning: '_mm_roti_epi32' macro redefined [-Wmacro-redefined]
hash.c: #define _mm_roti_epi32(r, c) ((8==-c) ? _mm_shuffle_epi8(r,r8) : ( (16==-c) ? _mm_shuffle_epi8(r,r16) : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-c) )) ) )
hash.c: ^
hash.c: /usr/lib/llvm-11/lib/clang/11.0.1/include/xopintrin.h:233:9: note: previous definition is here
hash.c: #define _mm_roti_epi32(A, N) \
hash.c: ^
hash.c: hash.c:93:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m0 = _mm_shuffle_epi8(LOADU(datablock + 00), u8to32);
hash.c: ^
hash.c: hash.c:94:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m1 = _mm_shuffle_epi8(LOADU(datablock + 16), u8to32);
hash.c: ^
hash.c: hash.c:95:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m2 = _mm_shuffle_epi8(LOADU(datablock + 32), u8to32);
hash.c: ^
hash.c: hash.c:96:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m3 = _mm_shuffle_epi8(LOADU(datablock + 48), u8to32);
hash.c: ^
hash.c: hash.c:115:3: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: ROUND( 0);
hash.c: ^
hash.c: ./rounds.h:52:3: note: expanded from macro 'ROUND'
hash.c: G1(row1,row2,row3,row4,buf1); \
hash.c: ^
hash.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41-2
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41-2
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41-2
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41-2
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE sse41-2

Compiler output

Implementation: sse41-2
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
hash.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:35,
hash.c: from /usr/lib/gcc/x86_64-linux-gnu/10/include/x86intrin.h:32,
hash.c: from blake256.h:7,
hash.c: from hash.c:2:
hash.c: hash.c: In function 'blake256_compress':
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:96:22: note: called from here
hash.c: 96 | const __m128i m3 = _mm_shuffle_epi8(LOADU(datablock + 48), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:35,
hash.c: from /usr/lib/gcc/x86_64-linux-gnu/10/include/x86intrin.h:32,
hash.c: from blake256.h:7,
hash.c: from hash.c:2:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:95:22: note: called from here
hash.c: 95 | const __m128i m2 = _mm_shuffle_epi8(LOADU(datablock + 32), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:35,
hash.c: from /usr/lib/gcc/x86_64-linux-gnu/10/include/x86intrin.h:32,
hash.c: from blake256.h:7,
hash.c: from hash.c:2:
hash.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41-2
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41-2
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41-2
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE sse41-2
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE xop
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE xop
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE xop
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE xop

Compiler output

Implementation: ssse3
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
hash.c: In file included from hash.c:122:
hash.c: ./rounds.ssse3.h:3:55: warning: implicit conversion from 'long' to 'int' changes value from 2242054355 to -2052912941 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3964562569, 698298832, 57701188, 2242054355);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:3:22: warning: implicit conversion from 'long' to 'int' changes value from 3964562569 to -330404727 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3964562569, 698298832, 57701188, 2242054355);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:6:33: warning: implicit conversion from 'long' to 'int' changes value from 2752067618 to -1542899678 [-Wconstant-conversion]
hash.c: buf1 = _mm_set_epi32(137296536, 2752067618, 320440878, 608135816);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:27:34: warning: implicit conversion from 'long' to 'int' changes value from 3380367581 to -914599715 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3041331479, 3380367581, 887688300, 953160567);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:27:22: warning: implicit conversion from 'long' to 'int' changes value from 3041331479 to -1253635817 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(3041331479, 3380367581, 887688300, 953160567);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:30:46: warning: implicit conversion from 'long' to 'int' changes value from 3193202383 to -1101764913 [-Wconstant-conversion]
hash.c: buf1 = _mm_set_epi32(1065670069, 3232508343, 3193202383, 1160258022);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:30:34: warning: implicit conversion from 'long' to 'int' changes value from 3232508343 to -1062458953 [-Wconstant-conversion]
hash.c: buf1 = _mm_set_epi32(1065670069, 3232508343, 3193202383, 1160258022);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ./rounds.ssse3.h:51:57: warning: implicit conversion from 'long' to 'int' changes value from 3193202383 to -1101764913 [-Wconstant-conversion]
hash.c: buf2 = _mm_set_epi32(137296536, 3041331479, 1160258022, 3193202383);
hash.c: ~~~~~~~~~~~~~ ^~~~~~~~~~
hash.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE ssse3
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE ssse3
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE ssse3
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE ssse3
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE ssse3

Compiler output

Implementation: ssse3
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
hash.c: In file included from hash.c:4:
hash.c: hash.c: In function 'blake256_compress':
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:108:15: note: called from here
hash.c: 108 | m.u128[3] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(datablock + 48)), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from hash.c:4:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:107:15: note: called from here
hash.c: 107 | m.u128[2] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(datablock + 32)), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from hash.c:4:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: | ^~~~~~~~~~~~~~~~
hash.c: hash.c:106:15: note: called from here
hash.c: 106 | m.u128[1] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(datablock + 16)), u8to32);
hash.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
hash.c: In file included from hash.c:4:
hash.c: /usr/lib/gcc/x86_64-linux-gnu/10/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8': target specific option mismatch
hash.c: 136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
hash.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE ssse3
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE ssse3
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE ssse3
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE ssse3

Compiler output

Implementation: xop
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
hash.c: hash.c:93:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m0 = _mm_shuffle_epi8(LOADU(datablock + 00), u8to32);
hash.c: ^
hash.c: hash.c:94:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m1 = _mm_shuffle_epi8(LOADU(datablock + 16), u8to32);
hash.c: ^
hash.c: hash.c:95:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m2 = _mm_shuffle_epi8(LOADU(datablock + 32), u8to32);
hash.c: ^
hash.c: hash.c:96:22: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'blake256_compress' that is compiled without support for 'ssse3'
hash.c: const __m128i m3 = _mm_shuffle_epi8(LOADU(datablock + 48), u8to32);
hash.c: ^
hash.c: hash.c:115:3: error: always_inline function '_mm_perm_epi8' requires target feature 'xop', but would be inlined into function 'blake256_compress' that is compiled without support for 'xop'
hash.c: ROUND( 0);
hash.c: ^
hash.c: ./rounds.h:51:3: note: expanded from macro 'ROUND'
hash.c: LOAD_MSG_ ##r ##_1(buf1); \
hash.c: ^
hash.c: <scratch space>:234:1: note: expanded from here
hash.c: LOAD_MSG_0_1
hash.c: ^
hash.c: ./load.xop.h:19:6: note: expanded from macro 'LOAD_MSG_0_1'
hash.c: s0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); \
hash.c: ^
hash.c: hash.c:115:3: error: '__builtin_ia32_vprotdi' needs target feature xop
hash.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE xop
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE xop
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE xop
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE xop
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE xop