Implementation notes: x86, samba, crypto_sign/sphincsf256shake256simple

Computer: samba
Architecture: x86
CPU ID: GenuineIntel-000506e3-bfebfbff
SUPERCOP version: 20190803
Operation: crypto_sign
Primitive: sphincsf256shake256simple
TimeImplementationCompilerBenchmark dateSUPERCOP version
560393841avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2019080920190803
566179752avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2019080920190803
885894521avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2019080920190803
885999615avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2019080920190803
962453933avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2019080920190803
962749331avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2019080920190803
1032305359avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2019080920190803
1033262412avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2019080920190803
1929753671refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2019080920190803
1934261948refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2019080920190803
1941512742refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2019080920190803
1946855662refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2019080920190803
1967237885refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2019080920190803
1967440121refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2019080920190803
1995069457refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2019080920190803
2045502855refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2019080920190803
2176038917refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2019080920190803
2176112536refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2019080920190803
2176532073refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2019080920190803
2212757429refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2019080920190803
2247113500refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2019080920190803
2247465267refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2019080920190803
2255671857refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2019080920190803
2256065778refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2019080920190803
2257461526refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2019080920190803
2304242696refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2019080920190803
2305472552refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2019080920190803
2319843000refgcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2019080920190803
2319975827refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2019080920190803
2319982964refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2019080920190803
2326301282refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2019080920190803
2332128696refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2019080920190803
2332962738refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2019080920190803
2335423641refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2019080920190803
2335484608refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2019080920190803
2380320897refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2019080920190803
2380389601refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2019080920190803
2394466470refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2019080920190803
2394478262refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2019080920190803
2406541080refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2019080920190803
2407018687refgcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2019080920190803
2407603496refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2019080920190803
2470984773refgcc -m32 -march=core2 -Os -fomit-frame-pointer2019080920190803
2472301553refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2019080920190803
2472460457refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2019080920190803
2472590804refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2019080920190803
2486748040refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2019080920190803
2486787115refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2019080920190803
2486994143refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2019080920190803
2501065639refgcc -m32 -march=prescott -Os -fomit-frame-pointer2019080920190803
2501176481refgcc -m32 -march=nocona -Os -fomit-frame-pointer2019080920190803
2501413914refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2019080920190803
2540950752refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2019080920190803
2558825978refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2019080920190803
2564322773refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2019080920190803
2564862940refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2019080920190803
2610432099refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2019080920190803
2643463510refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2019080920190803
2643562774refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2019080920190803
2654721303refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2019080920190803
2676691945refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2019080920190803
2715421615refgcc -m32 -O3 -fomit-frame-pointer2019080920190803
2733856222refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2019080920190803
2740898811refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2019080920190803
2744350071refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2019080920190803
2744819940refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2019080920190803
2745045645refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2019080920190803
2750884071refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2019080920190803
2767688078refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2019080920190803
2771573242refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2019080920190803
2790904287refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2019080920190803
2790939811refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2019080920190803
2797878651refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2019080920190803
2797976609refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2019080920190803
2810646556refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2019080920190803
2831579250refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2019080920190803
2832013479refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2019080920190803
2832314954refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2019080920190803
2835237324refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2019080920190803
2835545367refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2019080920190803
2835557935refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2019080920190803
2835633357refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2019080920190803
2836783855refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2019080920190803
2838125637refgcc -funroll-loops -m32 -O -fomit-frame-pointer2019080920190803
2838423500refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2019080920190803
2851131536refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2019080920190803
2851146109refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2019080920190803
2851518160refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2019080920190803
2853368357refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2019080920190803
2871278591refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2019080920190803
2889336730refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2019080920190803
2903766942refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2019080920190803
2929161141refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2019080920190803
2930497510refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2019080920190803
2930835149refgcc -m32 -march=k8 -O -fomit-frame-pointer2019080920190803
2931444599refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2019080920190803
2931563802refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2019080920190803
2933048528refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2019080920190803
2939807058refgcc -m32 -march=barcelona -O -fomit-frame-pointer2019080920190803
2943961777refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2019080920190803
2945738857refgcc -m32 -march=nocona -O -fomit-frame-pointer2019080920190803
2945812063refgcc -m32 -march=prescott -O -fomit-frame-pointer2019080920190803
2949858423refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2019080920190803
2973537044refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2019080920190803
2973670853refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2019080920190803
2975144234refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2019080920190803
2975497000refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2019080920190803
2982008034refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2019080920190803
2985326014refgcc -m32 -march=corei7 -O -fomit-frame-pointer2019080920190803
2986946230refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2019080920190803
2989076299refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2019080920190803
2989419857refgcc -m32 -march=core2 -O -fomit-frame-pointer2019080920190803
2989446917refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2019080920190803
2993070914refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2019080920190803
2993104210refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2019080920190803
3045850405refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2019080920190803
3045902149refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2019080920190803
3046055633refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2019080920190803
3089284214refgcc -m32 -march=k6 -Os -fomit-frame-pointer2019080920190803
3089333630refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2019080920190803
3089334606refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2019080920190803
3098328662refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2019080920190803
3098562948refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2019080920190803
3104189276refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2019080920190803
3104597599refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2019080920190803
3115860526refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2019080920190803
3120391236refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2019080920190803
3122522853refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2019080920190803
3122680629refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2019080920190803
3128129234refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2019080920190803
3129056004refgcc -m32 -march=i386 -Os -fomit-frame-pointer2019080920190803
3131387206refgcc -m32 -march=i486 -Os -fomit-frame-pointer2019080920190803
3133911631refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2019080920190803
3133949204refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2019080920190803
3134320030refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2019080920190803
3142531042refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2019080920190803
3145098885refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2019080920190803
3145688631refgcc -m32 -march=pentium -Os -fomit-frame-pointer2019080920190803
3155331211refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2019080920190803
3156708413refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2019080920190803
3174432199refgcc -m32 -Os -fomit-frame-pointer2019080920190803
3175063367refgcc -m32 -march=athlon -Os -fomit-frame-pointer2019080920190803
3180993770refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2019080920190803
3181010000refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2019080920190803
3181753828refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2019080920190803
3188053102refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2019080920190803
3188260571refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2019080920190803
3188717330refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2019080920190803
3340888665refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2019080920190803
3344008637refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2019080920190803
3344141300refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2019080920190803
3344438395refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2019080920190803
3352625371refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2019080920190803
3353194282refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2019080920190803
3390136863refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2019080920190803
3390143901refgcc -m32 -march=k6 -O -fomit-frame-pointer2019080920190803
3390164998refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2019080920190803
3406953192refgcc -m32 -march=athlon -O -fomit-frame-pointer2019080920190803
3424672177refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2019080920190803
3427158285refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2019080920190803
3452116314refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2019080920190803
3453506689refgcc -m32 -O -fomit-frame-pointer2019080920190803
3462883518refgcc -m32 -O2 -fomit-frame-pointer2019080920190803
3528185143refgcc -m32 -march=i386 -O -fomit-frame-pointer2019080920190803
3539048298refgcc -m32 -march=pentium -O -fomit-frame-pointer2019080920190803
3539161895refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2019080920190803
3548619946refgcc -m32 -march=i486 -O -fomit-frame-pointer2019080920190803
3564729977refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2019080920190803
3564805482refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2019080920190803
3564809784refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2019080920190803
3717528742refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2019080920190803
3718133655refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2019080920190803
6275468929refgcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer2019080920190803
6339256139refgcc -m32 -march=barcelona -O3 -fomit-frame-pointer2019080920190803
6767631712refgcc -m32 -march=k8 -O3 -fomit-frame-pointer2019080920190803
6794289443refgcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer2019080920190803
7117087091refgcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer2019080920190803
7141573611refgcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer2019080920190803
7305802566refgcc -m32 -march=barcelona -Os -fomit-frame-pointer2019080920190803
7372165839refgcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer2019080920190803
7648059337refgcc -m32 -march=k8 -Os -fomit-frame-pointer2019080920190803
7658111667refgcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer2019080920190803
7718341206refgcc -m32 -march=barcelona -O2 -fomit-frame-pointer2019080920190803
7819547417refgcc -m32 -march=k8 -O2 -fomit-frame-pointer2019080920190803

Compiler output

Implementation: crypto_sign/sphincsf256shake256simple/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf256shake256simple/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf256shake256simple/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^~~~~~~
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^~~~~~~~~~~~~~~~~~~~~~
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2