Implementation notes: x86, rumba5, crypto_sign/sphincss128shake256

Computer: rumba5
Architecture: x86
CPU ID: AuthenticAMD-00800f11-178bfbff
SUPERCOP version: 20181216
Operation: crypto_sign
Primitive: sphincss128shake256
TimeImplementationCompilerBenchmark dateSUPERCOP version
7191654336avx2gcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018090520180818
7469481984avx2gcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018090520180818
11310263168avx2gcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018090520180818
11433226016avx2gcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018090520180818
11463121952avx2gcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018090520180818
11522135488avx2gcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018090520180818
11539636672avx2gcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018090520180818
11564263776avx2gcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018090520180818
31753551648refgcc -m32 -march=native -mtune=native -O3 -fomit-frame-pointer2018090520180818
31975508288refgcc -m32 -march=core-avx2 -O3 -fomit-frame-pointer2018090520180818
33737332160refgcc -m32 -march=k8 -O3 -fomit-frame-pointer2018090520180818
33844695456refgcc -m32 -march=barcelona -O3 -fomit-frame-pointer2018090520180818
33911796064refgcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018090520180818
33978185664refgcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer2018090520180818
33999854208refgcc -m32 -march=core2 -O3 -fomit-frame-pointer2018090520180818
34054190560refgcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer2018090520180818
34070945632refgcc -m32 -march=corei7 -O3 -fomit-frame-pointer2018090520180818
34084895744refgcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer2018090520180818
34327835136refgcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer2018090520180818
34362461536refgcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer2018090620180818
34506209920refgcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer2018090620180818
34514185888refgcc -m32 -march=athlon -O3 -fomit-frame-pointer2018090520180818
34632394112refgcc -m32 -march=prescott -O3 -fomit-frame-pointer2018090520180818
34646409120refgcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer2018090620180818
34685216448refgcc -m32 -march=nocona -O3 -fomit-frame-pointer2018090520180818
34747229760refgcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer2018090520180818
34812622368refgcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer2018090520180818
35150914112refgcc -m32 -march=k6-2 -O3 -fomit-frame-pointer2018090520180818
35151969056refgcc -m32 -march=k6 -O3 -fomit-frame-pointer2018090520180818
35158772800refgcc -m32 -march=k6-3 -O3 -fomit-frame-pointer2018090520180818
35393541376refgcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer2018090620180818
35396101120refgcc -m32 -march=pentium-m -O3 -fomit-frame-pointer2018090520180818
35473972896refgcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer2018090620180818
35489888384refgcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018090620180818
35544691328refgcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer2018090620180818
35559918400refgcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer2018090620180818
35561452768refgcc -m32 -march=pentium4 -O3 -fomit-frame-pointer2018090520180818
35775668896refgcc -m32 -O3 -fomit-frame-pointer2018090520180818
35800289824refgcc -m32 -march=i386 -O3 -fomit-frame-pointer2018090520180818
35827565536refgcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer2018090520180818
36030913408refgcc -m32 -march=i486 -O3 -fomit-frame-pointer2018090520180818
36103460608refgcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer2018090620180818
36104111104refgcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer2018090520180818
36121142496refgcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer2018090620180818
36204758624refgcc -m32 -march=pentium3 -O3 -fomit-frame-pointer2018090520180818
36535622656refgcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer2018090520180818
36651234560refgcc -m32 -march=native -mtune=native -O2 -fomit-frame-pointer2018090520180818
36733490656refgcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer2018090620180818
36746155328refgcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer2018090520180818
36783342752refgcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer2018090620180818
36818640992refgcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer2018090620180818
36856716096refgcc -m32 -march=pentium2 -O3 -fomit-frame-pointer2018090520180818
36904807808refgcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer2018090620180818
36931214944refgcc -funroll-loops -m32 -O -fomit-frame-pointer2018090520180818
36976738400refgcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer2018090620180818
36985560544refgcc -m32 -march=core-avx2 -O2 -fomit-frame-pointer2018090520180818
37147649920refgcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer2018090520180818
37150343200refgcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer2018090620180818
37161264416refgcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer2018090620180818
37200585792refgcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer2018090620180818
37247513664refgcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer2018090620180818
37270066816refgcc -m32 -march=native -mtune=native -Os -fomit-frame-pointer2018090520180818
37288472000refgcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer2018090620180818
37338969024refgcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer2018090520180818
37347546016refgcc -funroll-loops -m32 -Os -fomit-frame-pointer2018090520180818
37379934240refgcc -m32 -march=pentium -O3 -fomit-frame-pointer2018090520180818
37382161024refgcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018090520180818
37394507392refgcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer2018090620180818
37410535904refgcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer2018090620180818
37458139968refgcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer2018090520180818
37476821824refgcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer2018090620180818
37482267168refgcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer2018090620180818
37533008320refgcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer2018090620180818
37538226720refgcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018090620180818
37542627200refgcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer2018090620180818
37546751008refgcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer2018090620180818
37555538752refgcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer2018090520180818
37697459648refgcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer2018090520180818
37732442400refgcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer2018090620180818
37873408416refgcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer2018090620180818
37971073216refgcc -m32 -march=native -mtune=native -O -fomit-frame-pointer2018090520180818
38016044128refgcc -funroll-loops -m32 -O3 -fomit-frame-pointer2018090520180818
38067124896refgcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer2018090620180818
38216491072refgcc -m32 -march=k6 -Os -fomit-frame-pointer2018090520180818
38224967840refgcc -m32 -march=core-avx2 -O -fomit-frame-pointer2018090520180818
38243091200refgcc -m32 -march=k6-3 -Os -fomit-frame-pointer2018090520180818
38267086848refgcc -m32 -march=k6-2 -Os -fomit-frame-pointer2018090520180818
38343752864refgcc -m32 -march=pentium -Os -fomit-frame-pointer2018090520180818
38359865856refgcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer2018090520180818
38427494944refgcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer2018090520180818
38484031136refgcc -funroll-loops -m32 -O2 -fomit-frame-pointer2018090520180818
38604264608refgcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer2018090620180818
38631043648refgcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer2018090620180818
38791192480refgcc -m32 -march=pentium4 -Os -fomit-frame-pointer2018090520180818
38802504448refgcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer2018090620180818
38804902848refgcc -m32 -march=prescott -Os -fomit-frame-pointer2018090520180818
38868733504refgcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018090620180818
38884512320refgcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer2018090620180818
39045131168refgcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer2018090620180818
39047600640refgcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer2018090620180818
39059135200refgcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer2018090620180818
39194444288refgcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer2018090620180818
39273203712refgcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer2018090520180818
39278969280refgcc -m32 -march=k6-2 -O -fomit-frame-pointer2018090520180818
39296159328refgcc -m32 -march=k6-3 -O -fomit-frame-pointer2018090520180818
39321898656refgcc -m32 -march=k6 -O -fomit-frame-pointer2018090520180818
39389720928refgcc -m32 -march=athlon -O -fomit-frame-pointer2018090520180818
39425201568refgcc -m32 -march=core2 -O -fomit-frame-pointer2018090520180818
39426027168refgcc -m32 -march=corei7-avx -O -fomit-frame-pointer2018090520180818
39441048736refgcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer2018090520180818
39456423328refgcc -m32 -march=core-avx-i -O -fomit-frame-pointer2018090520180818
39458851584refgcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer2018090520180818
39490926528refgcc -m32 -O -fomit-frame-pointer2018090520180818
39573578272refgcc -m32 -march=barcelona -O -fomit-frame-pointer2018090520180818
39678122176refgcc -m32 -march=prescott -O -fomit-frame-pointer2018090520180818
39706847712refgcc -m32 -march=nocona -O -fomit-frame-pointer2018090520180818
39714329728refgcc -m32 -march=i486 -Os -fomit-frame-pointer2018090520180818
39718424192refgcc -m32 -march=i386 -Os -fomit-frame-pointer2018090520180818
39779996160refgcc -m32 -march=nocona -Os -fomit-frame-pointer2018090520180818
39832442464refgcc -m32 -march=corei7 -O -fomit-frame-pointer2018090520180818
39832740064refgcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer2018090620180818
39936638656refgcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer2018090620180818
39957464224refgcc -m32 -march=prescott -O2 -fomit-frame-pointer2018090520180818
39958547008refgcc -m32 -march=nocona -O2 -fomit-frame-pointer2018090520180818
39991096512refgcc -m32 -march=core2 -O2 -fomit-frame-pointer2018090520180818
40026842624refgcc -m32 -march=pentium-m -Os -fomit-frame-pointer2018090520180818
40047493216refgcc -m32 -march=pentium3 -Os -fomit-frame-pointer2018090520180818
40070473312refgcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer2018090620180818
40073655520refgcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer2018090520180818
40128513792refgcc -m32 -march=i486 -O2 -fomit-frame-pointer2018090520180818
40160354752refgcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer2018090520180818
40172943584refgcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018090520180818
40198844672refgcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer2018090620180818
40205471104refgcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer2018090620180818
40247367520refgcc -m32 -march=pentium-mmx -O -fomit-frame-pointer2018090520180818
40274314368refgcc -m32 -march=pentium -O -fomit-frame-pointer2018090520180818
40274442048refgcc -m32 -march=k8 -O -fomit-frame-pointer2018090520180818
40286934432refgcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer2018090520180818
40354022208refgcc -m32 -march=i386 -O2 -fomit-frame-pointer2018090520180818
40474705536refgcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer2018090620180818
40475448384refgcc -m32 -march=corei7 -O2 -fomit-frame-pointer2018090520180818
40477469792refgcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer2018090620180818
40478989024refgcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer2018090620180818
40484985536refgcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer2018090620180818
40546539328refgcc -m32 -march=pentium2 -Os -fomit-frame-pointer2018090520180818
40575907968refgcc -m32 -march=k6-2 -O2 -fomit-frame-pointer2018090520180818
40585966016refgcc -m32 -march=k6-3 -O2 -fomit-frame-pointer2018090520180818
40603326208refgcc -m32 -march=k6 -O2 -fomit-frame-pointer2018090520180818
40613080032refgcc -m32 -march=pentiumpro -Os -fomit-frame-pointer2018090520180818
40622651488refgcc -m32 -march=i486 -O -fomit-frame-pointer2018090520180818
40625860224refgcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer2018090620180818
40772983488refgcc -m32 -march=pentium -O2 -fomit-frame-pointer2018090520180818
40804581408refgcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer2018090520180818
40873746112refgcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer2018090620180818
40895888640refgcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer2018090620180818
40925827776refgcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018090620180818
40929393152refgcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer2018090620180818
41009600672refgcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer2018090620180818
41038362720refgcc -m32 -march=i386 -O -fomit-frame-pointer2018090520180818
41473419680refgcc -m32 -march=core-avx2 -Os -fomit-frame-pointer2018090520180818
41495446464refgcc -m32 -march=corei7 -Os -fomit-frame-pointer2018090520180818
42079630272refgcc -m32 -march=corei7-avx -Os -fomit-frame-pointer2018090520180818
42638414144refgcc -m32 -march=core-avx-i -Os -fomit-frame-pointer2018090520180818
42729476768refgcc -m32 -march=pentium2 -O -fomit-frame-pointer2018090520180818
42744208192refgcc -m32 -march=pentiumpro -O -fomit-frame-pointer2018090520180818
42790705376refgcc -m32 -march=core2 -Os -fomit-frame-pointer2018090520180818
42795532448refgcc -m32 -march=pentium3 -O -fomit-frame-pointer2018090520180818
42837446368refgcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018090520180818
42881616864refgcc -m32 -march=pentium4 -O -fomit-frame-pointer2018090520180818
42957498400refgcc -m32 -march=pentium-m -O -fomit-frame-pointer2018090520180818
43069635360refgcc -m32 -march=k8 -O2 -fomit-frame-pointer2018090520180818
43325725600refgcc -m32 -march=pentium4 -O2 -fomit-frame-pointer2018090520180818
43451672288refgcc -m32 -march=athlon -O2 -fomit-frame-pointer2018090520180818
43651354592refgcc -m32 -O2 -fomit-frame-pointer2018090520180818
43893887584refgcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer2018090520180818
43894600000refgcc -m32 -march=pentium-m -O2 -fomit-frame-pointer2018090520180818
43914496480refgcc -m32 -march=pentium3 -O2 -fomit-frame-pointer2018090520180818
43928735104refgcc -m32 -march=pentium2 -O2 -fomit-frame-pointer2018090520180818
43929801888refgcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer2018090520180818
43984558592refgcc -m32 -march=barcelona -O2 -fomit-frame-pointer2018090520180818
44553350752refgcc -m32 -Os -fomit-frame-pointer2018090520180818
44821421312refgcc -m32 -march=k8 -Os -fomit-frame-pointer2018090520180818
45428646208refgcc -m32 -march=barcelona -Os -fomit-frame-pointer2018090520180818
48739845056refgcc -m32 -march=athlon -Os -fomit-frame-pointer2018090520180818

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -funroll-loops -m32 -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 156, namely:
CompilerImplementations
gcc -funroll-loops -m32 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -funroll-loops -m32 -march=prescott -Os -fomit-frame-pointer avx2
gcc -m32 -O2 -fomit-frame-pointer avx2
gcc -m32 -O3 -fomit-frame-pointer avx2
gcc -m32 -O -fomit-frame-pointer avx2
gcc -m32 -Os -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O2 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O3 -fomit-frame-pointer avx2
gcc -m32 -march=athlon -O -fomit-frame-pointer avx2
gcc -m32 -march=athlon -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m32 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i386 -O -fomit-frame-pointer avx2
gcc -m32 -march=i386 -Os -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=i486 -O -fomit-frame-pointer avx2
gcc -m32 -march=i486 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6-3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k6 -O -fomit-frame-pointer avx2
gcc -m32 -march=k6 -Os -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=k8 -O -fomit-frame-pointer avx2
gcc -m32 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=nocona -O -fomit-frame-pointer avx2
gcc -m32 -march=nocona -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-m -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium-mmx -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium2 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium3 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium4 -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentium -O -fomit-frame-pointer avx2
gcc -m32 -march=pentium -Os -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O2 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O3 -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -O -fomit-frame-pointer avx2
gcc -m32 -march=pentiumpro -Os -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O2 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O3 -fomit-frame-pointer avx2
gcc -m32 -march=prescott -O -fomit-frame-pointer avx2
gcc -m32 -march=prescott -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -m32 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m32 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m32 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m32 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m32 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m32 -march=corei7-avx -Os -fomit-frame-pointer avx2