Implementation notes: x86, samba, crypto_sign/sphincsf128shake256simple

Computer: samba
Architecture: x86
CPU ID: GenuineIntel-000506e3-bfebfbff
SUPERCOP version: 20190803
Operation: crypto_sign
Primitive: sphincsf128shake256simple
TimeImplementationCompilerBenchmark dateSUPERCOP version
221733405avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2019080820190803
222945141avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2019080820190803
345124383avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2019080820190803
350140254avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2019080820190803
357466530avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2019080820190803
363414355avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2019080820190803
369863283avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2019080820190803
370012010avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2019080820190803
702048923refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2019080820190803
702133673refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2019080820190803
703741839refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2019080820190803
705948230refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2019080820190803
714849123refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2019080820190803
715093143refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2019080820190803
724300454refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2019080820190803
742029220refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2019080820190803
784502246refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2019080820190803
791757945refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2019080820190803
791805877refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2019080820190803
799361235refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2019080820190803
816178078refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2019080820190803
816190270refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2019080820190803
820376387refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2019080820190803
820448399refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2019080820190803
821989445refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2019080820190803
831566998refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2019080820190803
831591718refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2019080820190803
834202770refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2019080820190803
836118246refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2019080820190803
836993006refgcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2019080820190803
838426867refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2019080820190803
838965498refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2019080820190803
841323978refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2019080820190803
841813740refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2019080820190803
842115494refgcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2019080820190803
845710379refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2019080820190803
845775639refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2019080820190803
857993694refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2019080820190803
858082451refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2019080820190803
860339216refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2019080820190803
863711654refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2019080820190803
864331148refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2019080820190803
894793768refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2019080820190803
894812518refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2019080820190803
894888621refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2019080820190803
896919198refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2019080820190803
896923804refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2019080820190803
896971532refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2019080820190803
905309529refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2019080820190803
905321894refgcc -m32 -march=nocona -Os -fomit-frame-pointer2019080820190803
905325263refgcc -m32 -march=prescott -Os -fomit-frame-pointer2019080820190803
906148674refgcc -m32 -march=core2 -Os -fomit-frame-pointer2019080820190803
906933399refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2019080820190803
906951546refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2019080820190803
906963294refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2019080820190803
927231113refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2019080820190803
937975091refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2019080820190803
940240873refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2019080820190803
943719732refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2019080820190803
964402345refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2019080820190803
964465806refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2019080820190803
989797103refgcc -m32 -O3 -fomit-frame-pointer2019080820190803
992913183refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2019080820190803
997619550refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2019080820190803
999160976refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2019080820190803
999883484refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2019080820190803
1000050363refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2019080820190803
1000629303refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2019080820190803
1003857274refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2019080820190803
1004332349refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2019080820190803
1014461494refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2019080820190803
1015955210refgcc -funroll-loops -m32 -O -fomit-frame-pointer2019080820190803
1018508924refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2019080820190803
1018673314refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2019080820190803
1021651069refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2019080820190803
1021732062refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2019080820190803
1021826683refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2019080820190803
1022212192refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2019080820190803
1022387117refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2019080820190803
1022401649refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2019080820190803
1024497754refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2019080820190803
1025369784refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2019080820190803
1025373365refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2019080820190803
1025406635refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2019080820190803
1027950348refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2019080820190803
1039331718refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2019080820190803
1039402534refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2019080820190803
1044963359refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2019080820190803
1048280548refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2019080820190803
1051808947refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2019080820190803
1056278553refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2019080820190803
1058151645refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2019080820190803
1059833097refgcc -m32 -march=nocona -O -fomit-frame-pointer2019080820190803
1059848448refgcc -m32 -march=prescott -O -fomit-frame-pointer2019080820190803
1060687945refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2019080820190803
1061067603refgcc -m32 -march=k8 -O -fomit-frame-pointer2019080820190803
1065244311refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2019080820190803
1065327708refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2019080820190803
1067678734refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2019080820190803
1068234617refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2019080820190803
1069019730refgcc -m32 -march=barcelona -O -fomit-frame-pointer2019080820190803
1073280522refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2019080820190803
1075122147refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2019080820190803
1075144224refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2019080820190803
1075267244refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2019080820190803
1075350638refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2019080820190803
1075352722refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2019080820190803
1078903738refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2019080820190803
1080075109refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2019080820190803
1080088764refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2019080820190803
1080125189refgcc -m32 -march=core2 -O -fomit-frame-pointer2019080820190803
1081093187refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2019080820190803
1085790786refgcc -m32 -march=corei7 -O -fomit-frame-pointer2019080820190803
1085820441refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2019080820190803
1086061639refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2019080820190803
1097864004refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2019080820190803
1097869893refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2019080820190803
1097919697refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2019080820190803
1112904072refgcc -m32 -march=i486 -Os -fomit-frame-pointer2019080820190803
1117385835refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2019080820190803
1117421720refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2019080820190803
1117963415refgcc -m32 -march=i386 -Os -fomit-frame-pointer2019080820190803
1118985722refgcc -m32 -march=pentium -Os -fomit-frame-pointer2019080820190803
1119032051refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2019080820190803
1120339033refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2019080820190803
1120357260refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2019080820190803
1120408252refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2019080820190803
1120885306refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2019080820190803
1120895887refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2019080820190803
1124738629refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2019080820190803
1124750732refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2019080820190803
1125607060refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2019080820190803
1125645347refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2019080820190803
1125647774refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2019080820190803
1126581544refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2019080820190803
1126612278refgcc -m32 -march=k6 -Os -fomit-frame-pointer2019080820190803
1126619621refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2019080820190803
1128341807refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2019080820190803
1134346059refgcc -m32 -Os -fomit-frame-pointer2019080820190803
1134353784refgcc -m32 -march=athlon -Os -fomit-frame-pointer2019080820190803
1134402468refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2019080820190803
1135062875refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2019080820190803
1137613375refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2019080820190803
1137624875refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2019080820190803
1137765499refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2019080820190803
1137865052refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2019080820190803
1138102508refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2019080820190803
1139108006refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2019080820190803
1208751202refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2019080820190803
1208799918refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2019080820190803
1208813262refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2019080820190803
1208859462refgcc -m32 -march=k6 -O -fomit-frame-pointer2019080820190803
1208873885refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2019080820190803
1209075834refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2019080820190803
1220508036refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2019080820190803
1220558918refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2019080820190803
1220566804refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2019080820190803
1223547377refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2019080820190803
1236354732refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2019080820190803
1239620908refgcc -m32 -march=athlon -O -fomit-frame-pointer2019080820190803
1241435446refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2019080820190803
1242921015refgcc -m32 -O2 -fomit-frame-pointer2019080820190803
1253529536refgcc -m32 -march=i386 -O -fomit-frame-pointer2019080820190803
1253724587refgcc -m32 -march=i486 -O -fomit-frame-pointer2019080820190803
1254691514refgcc -m32 -O -fomit-frame-pointer2019080820190803
1274898409refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2019080820190803
1274913119refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2019080820190803
1275148647refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2019080820190803
1277889956refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2019080820190803
1278058885refgcc -m32 -march=pentium -O -fomit-frame-pointer2019080820190803
1330694261refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2019080820190803
1331152816refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2019080820190803
2282381500refgcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer2019080820190803
2308419867refgcc -m32 -march=barcelona -O3 -fomit-frame-pointer2019080820190803
2461997931refgcc -m32 -march=k8 -O3 -fomit-frame-pointer2019080820190803
2472670367refgcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer2019080820190803
2583774762refgcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer2019080820190803
2591298895refgcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer2019080820190803
2645693950refgcc -m32 -march=barcelona -Os -fomit-frame-pointer2019080820190803
2648921087refgcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer2019080820190803
2773188669refgcc -m32 -march=k8 -Os -fomit-frame-pointer2019080820190803
2793628984refgcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer2019080820190803
2803330559refgcc -m32 -march=barcelona -O2 -fomit-frame-pointer2019080820190803
2839952653refgcc -m32 -march=k8 -O2 -fomit-frame-pointer2019080820190803

Compiler output

Implementation: crypto_sign/sphincsf128shake256simple/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf128shake256simple/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf128shake256simple/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2