Implementation notes: x86, kizomba, crypto_sign/sphincss256shake256

Computer: kizomba
Architecture: x86
CPU ID: GenuineIntel-000906e9-bfebfbff
SUPERCOP version: 20181216
Operation: crypto_sign
Primitive: sphincss256shake256
TimeImplementationCompilerBenchmark dateSUPERCOP version
7288845948avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018092520180818
8056339103avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018092520180818
11849740000avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018092520180818
12034134992avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018092520180818
13371641671avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018092520180818
13487161000avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018092520180818
15747402351avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018092520180818
28237969188avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018092520180818
28991402500refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018092520180818
29020573879refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2018092520180818
29037355118refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2018092520180818
29308038936refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2018092520180818
29684148524refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2018092620180818
29702823395refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018092520180818
30332194772refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018092520180818
30363940320refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2018092520180818
32542381149refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2018092620180818
32954153075refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2018092520180818
32989749526refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2018092520180818
33017239135refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2018092620180818
33634604363refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2018092520180818
33639461575refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2018092520180818
33658462690refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2018092620180818
33660061806refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2018092620180818
33715532065refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018092520180818
33768281677refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2018092520180818
33770465504refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2018092520180818
33817398700refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2018092620180818
34390901881refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018092520180818
34846614944refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018092520180818
34856271129refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2018092520180818
34862107486refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2018092520180818
34998296440refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2018092520180818
35006688148refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2018092520180818
35096660144refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018092520180818
35097710004refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2018092520180818
35102947425refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2018092520180818
36117154974refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2018092620180818
36158050011refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2018092620180818
36160392904refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2018092620180818
36162363103refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2018092620180818
36235576510refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2018092520180818
36251186534refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2018092620180818
36317290843refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2018092620180818
36447624593refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2018092620180818
36454711010refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2018092620180818
36605853193refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2018092620180818
36618825458refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2018092620180818
36687487484refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2018092620180818
36723058820refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2018092620180818
36848559308refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2018092520180818
36886860672refgcc -m32 -march=prescott -Os -fomit-frame-pointer2018092520180818
36889428222refgcc -m32 -march=nocona -Os -fomit-frame-pointer2018092520180818
36898890169refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2018092620180818
36981466744refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2018092620180818
36986590994refgcc -m32 -march=core2 -Os -fomit-frame-pointer2018092520180818
37922713700refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018092520180818
38180840090refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2018092620180818
38643474692refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018092520180818
40332189416refgcc -m32 -march=barcelona -O -fomit-frame-pointer2018092520180818
40380667918refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2018092620180818
40394761645refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2018092520180818
40398245272refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2018092520180818
40417923863refgcc -m32 -march=k8 -O -fomit-frame-pointer2018092520180818
40604405254refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2018092620180818
40769549414refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2018092620180818
40868036818refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2018092520180818
40875784463refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2018092620180818
40875897663refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2018092620180818
41141991877refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2018092520180818
41144641315refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2018092520180818
41145420234refgcc -m32 -march=core2 -O -fomit-frame-pointer2018092520180818
41252934603refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2018092520180818
41261068839refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2018092520180818
41262610236refgcc -m32 -O3 -fomit-frame-pointer2018092520180818
41270534109refgcc -m32 -march=corei7 -O -fomit-frame-pointer2018092520180818
41368828258refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2018092620180818
41433116018refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2018092520180818
41448835123refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2018092520180818
41542086514refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2018092620180818
41560409383refgcc -m32 -march=prescott -O -fomit-frame-pointer2018092520180818
41570620177refgcc -m32 -march=nocona -O -fomit-frame-pointer2018092520180818
41661434730refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2018092620180818
41971538176refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2018092520180818
43093515089refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2018092620180818
43210051924refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018092620180818
43210343010refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2018092620180818
43278830720refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2018092620180818
43306635833refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2018092620180818
43407607661refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2018092620180818
43937975103refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2018092620180818
44217755462refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2018092620180818
44220413445refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2018092620180818
44224777607refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2018092620180818
44424082647refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2018092620180818
44631248974refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2018092620180818
44757493887refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2018092620180818
44758610401refgcc -funroll-loops -m32 -O -fomit-frame-pointer2018092620180818
44759407576refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2018092620180818
44763332476refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2018092620180818
44834040076refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2018092620180818
44839390708refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2018092620180818
44860560490refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2018092620180818
45241908099refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018092620180818
45253749512refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2018092620180818
45286924901refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2018092620180818
45330071041refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2018092620180818
45464285508refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2018092520180818
45466239468refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2018092520180818
45467783716refgcc -m32 -march=k6 -Os -fomit-frame-pointer2018092520180818
45736691079refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2018092620180818
45762800171refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2018092620180818
45763060815refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2018092620180818
45764318726refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2018092620180818
45973592517refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018092620180818
45975970205refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2018092620180818
46095760028refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2018092620180818
46364469008refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2018092620180818
46381786139refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2018092620180818
46402166420refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2018092620180818
46448002401refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2018092620180818
46520427067refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2018092620180818
46546765296refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2018092620180818
46564055537refgcc -m32 -march=i386 -Os -fomit-frame-pointer2018092620180818
46564782088refgcc -m32 -march=i486 -Os -fomit-frame-pointer2018092620180818
46578101147refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018092620180818
46586596619refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2018092620180818
46727439961refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2018092620180818
46753149404refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2018092620180818
47170744770refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018092620180818
47191097602refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2018092620180818
47194379585refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2018092620180818
47329079484refgcc -m32 -march=pentium -Os -fomit-frame-pointer2018092620180818
47330806873refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018092620180818
47363996399refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018092620180818
47371615994refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2018092620180818
47659276591refgcc -m32 -Os -fomit-frame-pointer2018092520180818
47670102375refgcc -m32 -march=athlon -Os -fomit-frame-pointer2018092520180818
47714114559refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2018092620180818
47716742729refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2018092620180818
47732690056refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2018092620180818
47763811864refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2018092620180818
47769261921refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2018092620180818
47774872707refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2018092620180818
48836655234refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2018092620180818
48841356871refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2018092620180818
48847040182refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2018092620180818
49129549874refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2018092620180818
49157070768refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018092620180818
49384025302refgcc -m32 -O2 -fomit-frame-pointer2018092520180818
49533377817refgcc -m32 -march=athlon -O -fomit-frame-pointer2018092520180818
49744005584refgcc -m32 -O -fomit-frame-pointer2018092520180818
49948926980refgcc -m32 -march=k6 -O -fomit-frame-pointer2018092520180818
49951628573refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2018092520180818
49967399903refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2018092520180818
50017058787refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2018092520180818
50017762276refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2018092520180818
50026580313refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2018092520180818
50108346992refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2018092520180818
51088932408refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2018092620180818
51621124147refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2018092620180818
51675778641refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2018092620180818
51679667034refgcc -m32 -march=pentium -O -fomit-frame-pointer2018092620180818
51693193794refgcc -m32 -march=i386 -O -fomit-frame-pointer2018092620180818
52130313555refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2018092620180818
52140203913refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2018092620180818
52142820823refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018092620180818
52206752871refgcc -m32 -march=i486 -O -fomit-frame-pointer2018092620180818
53198334792refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018092620180818
53208674260refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2018092620180818

Test failure

Implementation: crypto_sign/sphincss256shake256/ref
Compiler: gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer
error 142
Alarm clock

Number of similar (compiler,implementation) pairs: 11, namely:
CompilerImplementations
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer ref
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer ref
gcc -m32 -march=k8 -O2 -fomit-frame-pointer ref
gcc -m32 -march=k8 -O3 -fomit-frame-pointer ref
gcc -m32 -march=k8 -Os -fomit-frame-pointer ref
gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer ref
gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer ref

Test failure

Implementation: crypto_sign/sphincss256shake256/ref
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
error 142
Alarm clock
error 142
Alarm clock

Number of similar (compiler,implementation) pairs: 3, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer ref
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer ref
gcc -m32 -march=barcelona -Os -fomit-frame-pointer ref

Compiler output

Implementation: crypto_sign/sphincss256shake256/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss256shake256/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss256shake256/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2