Implementation notes: x86, rumba5, crypto_sign/sphincsf256shake256

Computer: rumba5
Architecture: x86
CPU ID: AuthenticAMD-00800f11-178bfbff
SUPERCOP version: 20181216
Operation: crypto_sign
Primitive: sphincsf256shake256
TimeImplementationCompilerBenchmark dateSUPERCOP version
1580397504avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018090320180818
1618084256avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018090320180818
2428451104avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018090320180818
2431752288avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018090320180818
2453460512avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018090320180818
2456150176avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018090320180818
2508731968avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018090320180818
2523908352avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018090320180818
6246054592refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018090320180818
6295906560refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018090320180818
6648281728refgcc -m32 -march=k8 -O3 -fomit-frame-pointer2018090320180818
6658218880refgcc -m32 -march=barcelona -O3 -fomit-frame-pointer2018090320180818
6662852800refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018090320180818
6675807200refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2018090320180818
6680072320refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2018090320180818
6689199520refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2018090320180818
6690352928refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2018090320180818
6698458560refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2018090320180818
6746751776refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2018090320180818
6748037952refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2018090320180818
6760325440refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2018090320180818
6780215360refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2018090320180818
6780831968refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2018090320180818
6791798272refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2018090320180818
6791905504refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2018090320180818
6803606944refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2018090320180818
6815372352refgcc -m32 -O3 -fomit-frame-pointer2018090320180818
6818202368refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2018090320180818
6825303936refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2018090320180818
6906540864refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2018090320180818
6910154880refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2018090320180818
6923888064refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2018090320180818
6945544544refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2018090320180818
6965480192refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018090320180818
7041890080refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2018090320180818
7060227264refgcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer2018090320180818
7076561184refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2018090320180818
7089497536refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2018090320180818
7111713376refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2018090320180818
7112672256refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2018090320180818
7126492864refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2018090320180818
7128276640refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018090320180818
7169443296refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2018090320180818
7219916864refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2018090320180818
7220939072refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2018090320180818
7226398112refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2018090320180818
7228954272refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2018090320180818
7231480640refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2018090320180818
7240213408refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2018090320180818
7273480288refgcc -funroll-loops -m32 -O -fomit-frame-pointer2018090320180818
7274432736refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2018090320180818
7274894048refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2018090320180818
7290327520refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2018090320180818
7299845024refgcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018090320180818
7305102784refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018090320180818
7311258272refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2018090320180818
7312856448refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2018090320180818
7329470752refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2018090320180818
7332129952refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2018090320180818
7339286880refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2018090320180818
7341026944refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018090320180818
7348774304refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2018090320180818
7349394688refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2018090320180818
7349815648refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018090320180818
7352888192refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2018090320180818
7392492288refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2018090320180818
7402201472refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2018090320180818
7414577216refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2018090320180818
7433934368refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2018090320180818
7434704512refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2018090320180818
7462214144refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2018090320180818
7470175104refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018090320180818
7474298880refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2018090320180818
7475505920refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2018090320180818
7476445536refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2018090320180818
7476671296refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2018090320180818
7477844512refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2018090320180818
7478718016refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2018090320180818
7480497952refgcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer2018090320180818
7481030240refgcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer2018090320180818
7525223744refgcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018090320180818
7531872448refgcc -m32 -march=k6 -Os -fomit-frame-pointer2018090320180818
7537584672refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2018090320180818
7548085696refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2018090320180818
7556066304refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2018090320180818
7570074816refgcc -m32 -march=pentium -Os -fomit-frame-pointer2018090320180818
7574029696refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018090320180818
7592982368refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018090320180818
7640259264refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2018090320180818
7665488480refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2018090320180818
7666800736refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2018090320180818
7667745088refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2018090320180818
7687015680refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2018090320180818
7706619840refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018090320180818
7707415744refgcc -m32 -march=i386 -Os -fomit-frame-pointer2018090320180818
7710586944refgcc -m32 -march=i486 -Os -fomit-frame-pointer2018090320180818
7713953600refgcc -m32 -march=prescott -Os -fomit-frame-pointer2018090320180818
7719215328refgcc -m32 -march=nocona -Os -fomit-frame-pointer2018090320180818
7724218560refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2018090320180818
7729142432refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018090320180818
7751170464refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2018090320180818
7754431392refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2018090320180818
7797026368refgcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer2018090320180818
7797873472refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2018090320180818
7799337440refgcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer2018090320180818
7803643328refgcc -m32 -march=athlon -O -fomit-frame-pointer2018090320180818
7806129952refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2018090320180818
7806260128refgcc -m32 -march=nocona -O -fomit-frame-pointer2018090320180818
7812844384refgcc -m32 -march=barcelona -O -fomit-frame-pointer2018090320180818
7817666688refgcc -m32 -march=core2 -O -fomit-frame-pointer2018090320180818
7836257024refgcc -m32 -march=corei7 -O -fomit-frame-pointer2018090320180818
7836365568refgcc -m32 -march=prescott -O -fomit-frame-pointer2018090320180818
7855896096refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2018090320180818
7856369344refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2018090320180818
7873032640refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2018090320180818
7883810368refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2018090320180818
7888425024refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2018090320180818
7898803872refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2018090320180818
7902238432refgcc -m32 -O -fomit-frame-pointer2018090320180818
7905141056refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2018090320180818
7909250080refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2018090320180818
7912407072refgcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer2018090320180818
7913124160refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2018090320180818
7914774336refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2018090320180818
7924796352refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2018090320180818
7934894944refgcc -m32 -march=k8 -O -fomit-frame-pointer2018090320180818
7941896800refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2018090320180818
7961526656refgcc -m32 -march=i486 -O -fomit-frame-pointer2018090320180818
7965431168refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2018090320180818
7965524320refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2018090320180818
7974589216refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2018090320180818
7980641376refgcc -m32 -march=i386 -O -fomit-frame-pointer2018090320180818
7980954592refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2018090320180818
7982102912refgcc -m32 -march=k6 -O -fomit-frame-pointer2018090320180818
7983964160refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018090320180818
7985681760refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2018090320180818
7987512800refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2018090320180818
7988074368refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2018090320180818
7991053152refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2018090320180818
7997208768refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2018090320180818
7998944480refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2018090320180818
8004123424refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2018090320180818
8018205632refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2018090320180818
8020843424refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2018090320180818
8029584064refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2018090320180818
8036786048refgcc -m32 -march=pentium -O -fomit-frame-pointer2018090320180818
8039480928refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2018090320180818
8040512064refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2018090320180818
8043023104refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2018090320180818
8047717728refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2018090320180818
8051904192refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018090320180818
8059714240refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2018090320180818
8083588736refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2018090320180818
8116133376refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2018090320180818
8141579968refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2018090320180818
8177865184refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018090320180818
8249720768refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018090320180818
8268287104refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2018090320180818
8279460416refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2018090320180818
8383062336refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2018090320180818
8440126912refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2018090320180818
8457815744refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2018090320180818
8476684992refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2018090320180818
8488452096refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2018090320180818
8491680640refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2018090320180818
8528716224refgcc -m32 -march=k8 -O2 -fomit-frame-pointer2018090320180818
8548222144refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2018090320180818
8550388128refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2018090320180818
8553642368refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018090320180818
8565396800refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2018090320180818
8586486752refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2018090320180818
8592499264refgcc -m32 -march=core2 -Os -fomit-frame-pointer2018090320180818
8600549024refgcc -m32 -march=barcelona -O2 -fomit-frame-pointer2018090320180818
8601488480refgcc -m32 -O2 -fomit-frame-pointer2018090320180818
8614693120refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2018090320180818
8615907680refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2018090320180818
8677091232refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2018090320180818
8685468768refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018090320180818
8687752800refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2018090320180818
8731213696refgcc -m32 -Os -fomit-frame-pointer2018090320180818
8835121984refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2018090320180818
8998960960refgcc -m32 -march=athlon -Os -fomit-frame-pointer2018090320180818
9900094048refgcc -m32 -march=barcelona -Os -fomit-frame-pointer2018090320180818
9908402272refgcc -m32 -march=k8 -Os -fomit-frame-pointer2018090320180818

Compiler output

Implementation: crypto_sign/sphincsf256shake256/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf256shake256/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincsf256shake256/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2