Implementation notes: x86, titan0, crypto_sign/sphincsf128shake256simple

Computer: titan0
Architecture: x86
CPU ID: GenuineIntel-000306c3-bfebfbff
SUPERCOP version: 20190803
Operation: crypto_sign
Primitive: sphincsf128shake256simple
TimeImplementationCompilerBenchmark dateSUPERCOP version
233426268avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2019080820190803
233449500avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2019080820190803
365031328avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2019080820190803
367111996avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2019080820190803
380582120avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2019080820190803
381853196avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2019080820190803
396862668avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2019080820190803
396881332avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2019080820190803
718854552refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2019080820190803
720842216refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2019080820190803
730283044refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2019080820190803
741675712refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2019080820190803
746748488refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2019080820190803
758176460refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2019080820190803
760290052refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2019080820190803
760440656refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2019080820190803
794253184refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2019080820190803
827063852refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2019080820190803
827211020refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2019080820190803
834147052refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2019080820190803
835273788refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2019080820190803
836910136refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2019080820190803
844478304refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2019080820190803
845466560refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2019080820190803
845672340refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2019080820190803
849302364refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2019080820190803
849431224refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2019080820190803
857810572refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2019080820190803
863431132refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2019080820190803
866810368refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2019080820190803
868163120refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2019080820190803
868629412refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2019080820190803
868992452refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2019080820190803
895214892refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2019080820190803
895636116refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2019080820190803
897436636refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2019080820190803
905806884refgcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2019080820190803
906346208refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2019080820190803
907751096refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2019080820190803
925199564refgcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2019080820190803
960145828refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2019080820190803
964037560refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2019080820190803
990155680refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2019080820190803
993812476refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2019080820190803
994641452refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2019080820190803
995301032refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2019080820190803
1014695720refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2019080820190803
1020516936refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2019080820190803
1020738324refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2019080820190803
1033622760refgcc -m32 -O3 -fomit-frame-pointer2019080820190803
1033883444refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2019080820190803
1039266668refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2019080820190803
1044735028refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2019080820190803
1046868324refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2019080820190803
1049299640refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2019080820190803
1049888856refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2019080820190803
1052411560refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2019080820190803
1054575324refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2019080820190803
1055216788refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2019080820190803
1059112948refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2019080820190803
1059493408refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2019080820190803
1061923984refgcc -funroll-loops -m32 -O -fomit-frame-pointer2019080820190803
1064190892refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2019080820190803
1064563620refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2019080820190803
1064693928refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2019080820190803
1065017056refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2019080820190803
1066003432refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2019080820190803
1066858752refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2019080820190803
1067215420refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2019080820190803
1068776636refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2019080820190803
1068813464refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2019080820190803
1069325088refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2019080820190803
1070931420refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2019080820190803
1079881252refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2019080820190803
1090182108refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2019080820190803
1091134380refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2019080820190803
1092583020refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2019080820190803
1096132772refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2019080820190803
1096201884refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2019080820190803
1096783936refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2019080820190803
1096840096refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2019080820190803
1098943440refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2019080820190803
1100219060refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2019080820190803
1104925492refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2019080820190803
1105991408refgcc -m32 -march=k8 -O -fomit-frame-pointer2019080820190803
1109689984refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2019080820190803
1110389688refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2019080820190803
1113789968refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2019080820190803
1113976692refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2019080820190803
1116064816refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2019080820190803
1116958152refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2019080820190803
1118048608refgcc -m32 -march=core2 -O -fomit-frame-pointer2019080820190803
1118229052refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2019080820190803
1118438596refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2019080820190803
1119144600refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2019080820190803
1124225580refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2019080820190803
1124658344refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2019080820190803
1129165492refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2019080820190803
1129230304refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2019080820190803
1129429192refgcc -m32 -march=corei7 -O -fomit-frame-pointer2019080820190803
1130464892refgcc -m32 -march=nocona -O -fomit-frame-pointer2019080820190803
1132419324refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2019080820190803
1132921560refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2019080820190803
1133150444refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2019080820190803
1134670552refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2019080820190803
1135191832refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2019080820190803
1135497436refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2019080820190803
1138782564refgcc -m32 -march=prescott -O -fomit-frame-pointer2019080820190803
1141615928refgcc -m32 -march=core2 -Os -fomit-frame-pointer2019080820190803
1147403084refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2019080820190803
1147691336refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2019080820190803
1147762312refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2019080820190803
1148330984refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2019080820190803
1154725004refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2019080820190803
1155212332refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2019080820190803
1155309540refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2019080820190803
1156386456refgcc -m32 -march=barcelona -O -fomit-frame-pointer2019080820190803
1156949780refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2019080820190803
1158895968refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2019080820190803
1171058820refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2019080820190803
1174386004refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2019080820190803
1183128492refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2019080820190803
1183240552refgcc -m32 -march=prescott -Os -fomit-frame-pointer2019080820190803
1183404888refgcc -m32 -march=nocona -Os -fomit-frame-pointer2019080820190803
1194356092refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2019080820190803
1196261652refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2019080820190803
1197312996refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2019080820190803
1199517908refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2019080820190803
1202929264refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2019080820190803
1203382288refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2019080820190803
1203781140refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2019080820190803
1204691668refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2019080820190803
1205004100refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2019080820190803
1205056280refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2019080820190803
1205725320refgcc -m32 -march=k6 -Os -fomit-frame-pointer2019080820190803
1206542760refgcc -m32 -march=i386 -Os -fomit-frame-pointer2019080820190803
1209842968refgcc -m32 -march=i486 -Os -fomit-frame-pointer2019080820190803
1215168352refgcc -m32 -march=pentium -Os -fomit-frame-pointer2019080820190803
1215556620refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2019080820190803
1219443520refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2019080820190803
1221951688refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2019080820190803
1221985948refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2019080820190803
1222985016refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2019080820190803
1223117840refgcc -m32 -march=athlon -Os -fomit-frame-pointer2019080820190803
1224081096refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2019080820190803
1224737104refgcc -m32 -Os -fomit-frame-pointer2019080820190803
1225991520refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2019080820190803
1227442756refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2019080820190803
1263267824refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2019080820190803
1266213376refgcc -m32 -march=k6 -O -fomit-frame-pointer2019080820190803
1267427084refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2019080820190803
1283475868refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2019080820190803
1286258056refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2019080820190803
1293133108refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2019080820190803
1296226624refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2019080820190803
1298900400refgcc -m32 -march=i486 -O -fomit-frame-pointer2019080820190803
1299584392refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2019080820190803
1305073600refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2019080820190803
1305922548refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2019080820190803
1312679720refgcc -m32 -march=athlon -O -fomit-frame-pointer2019080820190803
1314224424refgcc -m32 -O -fomit-frame-pointer2019080820190803
1322893120refgcc -m32 -O2 -fomit-frame-pointer2019080820190803
1329617460refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2019080820190803
1348465860refgcc -m32 -march=i386 -O -fomit-frame-pointer2019080820190803
1353535492refgcc -m32 -march=pentium -O -fomit-frame-pointer2019080820190803
1353749780refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2019080820190803
1358846460refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2019080820190803
1370970888refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2019080820190803
1370974576refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2019080820190803
1371137976refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2019080820190803
1436882836refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2019080820190803
1438267452refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2019080820190803
2310899356refgcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer2019080820190803
2340272816refgcc -m32 -march=barcelona -O3 -fomit-frame-pointer2019080820190803
2475399948refgcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer2019080820190803
2479724268refgcc -m32 -march=k8 -O3 -fomit-frame-pointer2019080820190803
2576059220refgcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer2019080820190803
2606278368refgcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer2019080820190803
2694746232refgcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer2019080820190803
2727417288refgcc -m32 -march=barcelona -Os -fomit-frame-pointer2019080820190803
2784030376refgcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer2019080820190803
2797320284refgcc -m32 -march=k8 -Os -fomit-frame-pointer2019080820190803
2806767492refgcc -m32 -march=k8 -O2 -fomit-frame-pointer2019080820190803
2843685488refgcc -m32 -march=barcelona -O2 -fomit-frame-pointer2019080820190803

Compiler output

Implementation: crypto_sign/sphincsf128shake256simple/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf128shake256simple/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf128shake256simple/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2