Implementation notes: amd64, rumba5, crypto_sign/sphincss128shake256

Computer: rumba5
Architecture: amd64
CPU ID: AuthenticAMD-00800f11-178bfbff
SUPERCOP version: 20181216
Operation: crypto_sign
Primitive: sphincss128shake256
TimeImplementationCompilerBenchmark dateSUPERCOP version
6001558400avx2clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018090520180818
6015603200avx2clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018090520180818
6018042752avx2clang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018090520180818
6031627520avx2clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2018090520180818
6084255776avx2gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018090520180818
6084399968avx2gcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018090520180818
6126086592avx2gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018090520180818
6254593632refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018090520180818
6258461472refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018090520180818
6276161568refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018090520180818
6280890432refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018090520180818
6314257600refgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018090520180818
7197094240refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2018090520180818
7208675840refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2018090520180818
7224283552refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2018090520180818
7227025024refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2018090520180818
7229714912refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018090520180818
7241311616refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018090520180818
7244731712refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018090520180818
7245824128refclang -O3 -fomit-frame-pointer -Qunused-arguments2018090520180818
7300931360refgcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2018090520180818
7363411904refgcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2018090520180818
7368440576refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2018090520180818
7375266752refgcc -march=k8 -O3 -fomit-frame-pointer2018090520180818
7379693600refgcc -m64 -march=corei7 -O3 -fomit-frame-pointer2018090520180818
7390978464refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2018090520180818
7397828896refgcc -march=barcelona -O3 -fomit-frame-pointer2018090520180818
7408113248refclang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018090520180818
7409444192refclang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments2018090520180818
7412941760refgcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2018090520180818
7414026080refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2018090520180818
7425526336refgcc -m64 -march=core2 -O3 -fomit-frame-pointer2018090520180818
7438349344refclang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2018090520180818
7439038624refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2018090520180818
7442844928refgcc -funroll-loops -O3 -fomit-frame-pointer2018090520180818
7459187616refclang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018090520180818
7512490016refgcc -O3 -fomit-frame-pointer2018090520180818
7520860640refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2018090520180818
7527431072refgcc -m64 -O3 -fomit-frame-pointer2018090520180818
7531046656refgcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018090520180818
7623596448refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2018090520180818
7635718752refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2018090520180818
7760420704refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2018090520180818
7772963808refgcc -march=nocona -O3 -fomit-frame-pointer2018090520180818
7824806784avx2gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018090520180818
7858478880refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2018090520180818
7895066592refgcc -funroll-loops -O2 -fomit-frame-pointer2018090520180818
7911808032refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2018090520180818
7914219424refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2018090520180818
7917356160refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2018090520180818
7937116448avx2gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018090520180818
7939537536refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2018090520180818
7949110080refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2018090520180818
7966434496refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2018090520180818
7967052032refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2018090520180818
8030868544avx2gcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018090520180818
8213112864refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2018090520180818
8233059680refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2018090520180818
8241864032refgcc -funroll-loops -O -fomit-frame-pointer2018090520180818
8245006944refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2018090520180818
8247791840refgcc -funroll-loops -m64 -O -fomit-frame-pointer2018090520180818
8264168064refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2018090520180818
8267051328refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2018090520180818
8279293376refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2018090520180818
8306217120refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2018090520180818
8320695232refgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018090520180818
8360501856refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018090520180818
8393282656refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018090520180818
8652641696avx2gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018090520180818
8660733728avx2gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018090520180818
8682230432refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018090520180818
8694114592avx2gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018090520180818
8712403744avx2gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018090520180818
8763537952avx2gcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018090520180818
8784871328avx2gcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018090520180818
8825937056refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018090520180818
8914436416refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018090520180818
8995527360refgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018090520180818
9291704256refgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018090520180818
9328304128refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018090520180818
9364382752refgcc -m64 -O2 -fomit-frame-pointer2018090520180818
9368967296refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2018090520180818
9376934016refgcc -O2 -fomit-frame-pointer2018090520180818
9402181632refgcc -march=barcelona -O2 -fomit-frame-pointer2018090520180818
9411020224refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2018090520180818
9444890944refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2018090520180818
9448873856refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018090520180818
9450167520refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2018090520180818
9450414496refgcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2018090520180818
9450856704refgcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2018090520180818
9451153952refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2018090520180818
9640537088refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2018090520180818
9692864736refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2018090520180818
9702691296refgcc -march=nocona -O2 -fomit-frame-pointer2018090520180818
9710136736refgcc -m64 -march=corei7 -O -fomit-frame-pointer2018090520180818
9717517888refgcc -m64 -march=core-avx-i -O -fomit-frame-pointer2018090520180818
9718262720refgcc -m64 -march=core2 -O -fomit-frame-pointer2018090520180818
9720673344refgcc -m64 -march=corei7-avx -O -fomit-frame-pointer2018090520180818
9724967392refgcc -m64 -march=k8 -Os -fomit-frame-pointer2018090520180818
9726443104refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2018090520180818
9731269920refgcc -march=k8 -Os -fomit-frame-pointer2018090520180818
9738965088refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2018090520180818
9739058976refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2018090520180818
9742369760refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018090520180818
9746973120refgcc -m64 -march=core2 -Os -fomit-frame-pointer2018090520180818
9839447200refgcc -march=barcelona -Os -fomit-frame-pointer2018090520180818
9851927264refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2018090520180818
9858376608refgcc -fno-schedule-insns -Os -fomit-frame-pointer2018090520180818
9873124672refgcc -m64 -Os -fomit-frame-pointer2018090520180818
9877543168refgcc -Os -fomit-frame-pointer2018090520180818
9928409280refgcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2018090520180818
9936644896refgcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2018090520180818
10005710944refgcc -m64 -march=barcelona -O -fomit-frame-pointer2018090520180818
10012326848refgcc -march=barcelona -O -fomit-frame-pointer2018090520180818
10073500864refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2018090520180818
10075745728refgcc -march=k8 -O2 -fomit-frame-pointer2018090520180818
10163769824refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2018090520180818
10165136064refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2018090520180818
10184620224refgcc -O -fomit-frame-pointer2018090520180818
10185619296refgcc -fno-schedule-insns -O -fomit-frame-pointer2018090520180818
10193703808refgcc -m64 -O -fomit-frame-pointer2018090520180818
10268542272refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2018090520180818
10269732576refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2018090520180818
10284136544refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2018090520180818
10285402656refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2018090520180818
10288001312refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2018090520180818
10292395936refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2018090520180818
10305015712refgcc -funroll-loops -Os -fomit-frame-pointer2018090520180818
10362131904refgcc -march=k8 -O -fomit-frame-pointer2018090520180818
10368736544refgcc -m64 -march=k8 -O -fomit-frame-pointer2018090520180818
10832050080refgcc -march=nocona -Os -fomit-frame-pointer2018090520180818
10836049248refgcc -m64 -march=nocona -Os -fomit-frame-pointer2018090520180818
10963278336refgcc -march=nocona -O -fomit-frame-pointer2018090520180818
10968383264refgcc -m64 -march=nocona -O -fomit-frame-pointer2018090520180818
34158154432refgcc -funroll-loops2018090520180818
34159618560refgcc2018090520180818
34194449632refcc2018090520180818

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: cc
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 87, namely:
CompilerImplementations
cc avx2
gcc avx2
gcc -O2 -fomit-frame-pointer avx2
gcc -O3 -fomit-frame-pointer avx2
gcc -O -fomit-frame-pointer avx2
gcc -Os -fomit-frame-pointer avx2
gcc -fno-schedule-insns -O2 -fomit-frame-pointer avx2
gcc -fno-schedule-insns -O3 -fomit-frame-pointer avx2
gcc -fno-schedule-insns -O -fomit-frame-pointer avx2
gcc -fno-schedule-insns -Os -fomit-frame-pointer avx2
gcc -funroll-loops avx2
gcc -funroll-loops -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -O -fomit-frame-pointer avx2
gcc -funroll-loops -Os -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer avx2
gcc -m64 -O2 -fomit-frame-pointer avx2
gcc -m64 -O3 -fomit-frame-pointer avx2
gcc -m64 -O -fomit-frame-pointer avx2
gcc -m64 -Os -fomit-frame-pointer avx2
gcc -m64 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -O -fomit-frame-pointer avx2
gcc -m64 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m64 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=k8 -O -fomit-frame-pointer avx2
gcc -m64 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m64 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m64 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m64 -march=nocona -O -fomit-frame-pointer avx2
gcc -m64 -march=nocona -Os -fomit-frame-pointer avx2
gcc -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -march=barcelona -O -fomit-frame-pointer avx2
gcc -march=barcelona -Os -fomit-frame-pointer avx2
gcc -march=k8 -O2 -fomit-frame-pointer avx2
gcc -march=k8 -O3 -fomit-frame-pointer avx2
gcc -march=k8 -O -fomit-frame-pointer avx2
gcc -march=k8 -Os -fomit-frame-pointer avx2
gcc -march=nocona -O2 -fomit-frame-pointer avx2
gcc -march=nocona -O3 -fomit-frame-pointer avx2
gcc -march=nocona -O -fomit-frame-pointer avx2
gcc -march=nocona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:136:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: lanes1 = LOAD256u( curData1[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:137:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: lanes2 = LOAD256u( curData2[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:138:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments avx2
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx2
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx2
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:140:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+0], lanes0 ),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: expanded from macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:141:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+1], lanes1 ),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: expanded from macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: expanded from macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments avx2
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m64 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m64 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_sign/sphincss128shake256/avx2
Compiler: gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer avx2