Implementation notes: x86, bolero, crypto_sign/sphincss128shake256

Computer: bolero
Architecture: x86
CPU ID: GenuineIntel-000406f1-bfebfbff
SUPERCOP version: 20190110
Operation: crypto_sign
Primitive: sphincss128shake256
TimeImplementationCompilerBenchmark dateSUPERCOP version
4949351812avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018092420180818
4950544424avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018092420180818
8150940084avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018092420180818
8151887688avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018092420180818
8275641892avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018092420180818
8367668624avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018092420180818
8532647852avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018092420180818
8533374576avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018092420180818
23740152260refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018092520180818
23742159352refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018092420180818
25594271592refgcc -m32 -march=barcelona -O3 -fomit-frame-pointer2018092420180818
25608639900refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2018092520180818
25656347092refgcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer2018092520180818
25662557596refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2018092520180818
25692903720refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2018092520180818
25694492200refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018092420180818
25697375324refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2018092420180818
25751461484refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2018092520180818
25760308236refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2018092520180818
25868106244refgcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer2018092620180818
25889835108refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2018092520180818
25915705792refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2018092520180818
25924820356refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2018092620180818
25931654224refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2018092620180818
25973057792refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2018092620180818
25985471424refgcc -m32 -O3 -fomit-frame-pointer2018092420180818
25991406032refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2018092520180818
26029326192refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2018092520180818
26050494804refgcc -m32 -march=k8 -O3 -fomit-frame-pointer2018092520180818
26068126996refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2018092520180818
26224387320refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2018092620180818
26267475800refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2018092520180818
26269038500refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2018092520180818
26295258616refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2018092620180818
26360907220refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2018092520180818
26381624808refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2018092620180818
27172905432refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2018092520180818
27302273780refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2018092620180818
27394519600refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2018092520180818
27549879816refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2018092620180818
27551412264refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018092520180818
27596120432refgcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018092420180818
28105217632refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018092420180818
28111010200refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018092520180818
28114035672refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2018092520180818
28161322832refgcc -funroll-loops -m32 -O -fomit-frame-pointer2018092520180818
28305635640refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2018092520180818
28309381272refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018092520180818
28325468504refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2018092620180818
28346749156refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2018092620180818
28352750216refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2018092620180818
28354763588refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2018092620180818
28359056012refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2018092520180818
28397909148refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2018092520180818
28422734256refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2018092620180818
28494554064refgcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer2018092520180818
28506038488refgcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer2018092620180818
28506939564refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2018092620180818
28520756104refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2018092620180818
28520968764refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2018092520180818
28546712996refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2018092620180818
28615428896refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2018092620180818
28817928244refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2018092620180818
28842066928refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2018092620180818
28842630700refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018092620180818
28937938256refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2018092620180818
29053760304refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2018092620180818
29055246928refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018092520180818
29056197008refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2018092620180818
29056460028refgcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018092420180818
29069356264refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2018092620180818
29080709708refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2018092620180818
29224922516refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2018092620180818
29232953692refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2018092620180818
29235664192refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2018092620180818
29313171412refgcc -m32 -march=nocona -Os -fomit-frame-pointer2018092520180818
29322384500refgcc -m32 -march=prescott -Os -fomit-frame-pointer2018092520180818
29326525844refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2018092520180818
29393524080refgcc -m32 -march=i386 -Os -fomit-frame-pointer2018092520180818
29396671240refgcc -m32 -march=i486 -Os -fomit-frame-pointer2018092520180818
29478408160refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2018092520180818
29482692240refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018092520180818
29499432140refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2018092620180818
29507201200refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2018092620180818
29516501244refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2018092620180818
29539743996refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2018092620180818
29541476992refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2018092620180818
29576357680refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2018092620180818
29591980732refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2018092520180818
29616460776refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2018092620180818
29616484152refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2018092620180818
29644716336refgcc -m32 -march=k6 -Os -fomit-frame-pointer2018092520180818
29654781092refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2018092520180818
29656091852refgcc -m32 -march=athlon -Os -fomit-frame-pointer2018092520180818
29683703840refgcc -m32 -march=k8 -Os -fomit-frame-pointer2018092520180818
29696038580refgcc -m32 -march=barcelona -Os -fomit-frame-pointer2018092420180818
29696073020refgcc -m32 -Os -fomit-frame-pointer2018092420180818
29741597684refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018092620180818
29741963012refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2018092620180818
29748258988refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2018092520180818
29823440276refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2018092520180818
29858546476refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2018092620180818
29860284460refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2018092620180818
29862175844refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2018092620180818
29869868320refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2018092420180818
29875995912refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018092520180818
29881114512refgcc -m32 -march=core2 -Os -fomit-frame-pointer2018092520180818
29882191568refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2018092520180818
29931427392refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2018092620180818
29963324372refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2018092620180818
29995763940refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2018092620180818
30033831588refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2018092620180818
30034695640refgcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer2018092520180818
30052507404refgcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer2018092620180818
30145642236refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2018092620180818
30160728140refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2018092520180818
30190100648refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2018092620180818
30191147692refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018092520180818
30191812156refgcc -m32 -march=pentium -Os -fomit-frame-pointer2018092520180818
30356033776refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2018092620180818
30411731192refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018092620180818
30484828492refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2018092620180818
30510614808refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2018092520180818
30536470424refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2018092520180818
30567881040refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2018092520180818
30737593300refgcc -m32 -march=corei7 -O -fomit-frame-pointer2018092520180818
30869629904refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018092620180818
30875843424refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2018092620180818
30886185056refgcc -m32 -O2 -fomit-frame-pointer2018092420180818
30926846612refgcc -m32 -march=barcelona -O2 -fomit-frame-pointer2018092420180818
30940993540refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2018092520180818
30972502288refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018092520180818
30976424196refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2018092420180818
30981835096refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2018092520180818
30983536996refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2018092520180818
31008413264refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2018092520180818
31038174604refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2018092520180818
31060140696refgcc -m32 -march=k8 -O2 -fomit-frame-pointer2018092520180818
31064355100refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2018092520180818
31190774220refgcc -m32 -march=k8 -O -fomit-frame-pointer2018092520180818
31195949080refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2018092520180818
31197763396refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2018092520180818
31291234372refgcc -m32 -march=core2 -O -fomit-frame-pointer2018092520180818
31335927180refgcc -m32 -march=athlon -O -fomit-frame-pointer2018092520180818
31348911512refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2018092520180818
31380641964refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2018092420180818
31419907972refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2018092620180818
31422897104refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2018092520180818
31425151184refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2018092620180818
31435770452refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2018092520180818
31525625552refgcc -m32 -O -fomit-frame-pointer2018092420180818
31584488848refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2018092520180818
31588650456refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2018092520180818
31605477992refgcc -m32 -march=k6 -O -fomit-frame-pointer2018092520180818
31627013208refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2018092520180818
31784871104refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2018092520180818
31822651636refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2018092520180818
31851331172refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2018092520180818
32008065888refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2018092520180818
32046650948refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2018092620180818
32047031000refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2018092620180818
32138141912refgcc -m32 -march=nocona -O -fomit-frame-pointer2018092520180818
32149839816refgcc -m32 -march=prescott -O -fomit-frame-pointer2018092520180818
32207529596refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2018092620180818
32210718968refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018092620180818
32248617560refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2018092620180818
32279329852refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2018092620180818
32300487396refgcc -m32 -march=pentium -O -fomit-frame-pointer2018092520180818
32361650688refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2018092520180818
32423459384refgcc -m32 -march=barcelona -O -fomit-frame-pointer2018092420180818
32491225108refgcc -m32 -march=i486 -O -fomit-frame-pointer2018092520180818
32736070180refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2018092520180818
32740831508refgcc -m32 -march=i386 -O -fomit-frame-pointer2018092520180818
33028547364refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2018092520180818
33089781808refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2018092520180818
33104274952refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2018092520180818
33282437868refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2018092520180818
33447431860refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2018092520180818
33660857480refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2018092520180818
33661049984refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018092520180818
35026994584refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2018092520180818
35083280864refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2018092520180818
35098780820refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018092520180818
35150178436refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2018092520180818

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2