Implementation notes: amd64, bolero, crypto_kem/kyber1024

Computer: bolero
Architecture: amd64
CPU ID: GenuineIntel-000406f1-bfebfbff
SUPERCOP version: 20181209
Operation: crypto_kem
Primitive: kyber1024
TimeImplementationCompilerBenchmark dateSUPERCOP version
302540avx2gcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018101720180818
306004avx2gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018101720180818
306052avx2gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018101720180818
311020avx2clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018101720180818
311048avx2clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018101720180818
349512avx2clang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018101720180818
349568avx2clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2018101720180818
353508avx2gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018101720180818
354908avx2gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018101720180818
358300avx2gcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018101720180818
373876avx2gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018101720180818
379736avx2gcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018101720180818
383852avx2gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018101720180818
431212avx2gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018101720180818
431324avx2gcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018101720180818
433600avx2gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018101720180818
951916refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018101720180818
959812refgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018101720180818
1003016refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018101720180818
1016028refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018101720180818
1030648refgcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018101720180818
1041724refgcc -m64 -march=corei7 -O3 -fomit-frame-pointer2018101720180818
1045824refgcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2018101720180818
1046280refgcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2018101720180818
1049304refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2018101720180818
1049752refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018101720180818
1050388refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2018101720180818
1050700refclang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018101720180818
1050732refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2018101720180818
1052160refgcc -funroll-loops -O3 -fomit-frame-pointer2018101720180818
1052944refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2018101720180818
1053300refclang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments2018101720180818
1055544refgcc -O3 -fomit-frame-pointer2018101720180818
1057720refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2018101720180818
1060900refgcc -m64 -O3 -fomit-frame-pointer2018101720180818
1061736refgcc -march=barcelona -O3 -fomit-frame-pointer2018101720180818
1063676refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2018101720180818
1085452refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018101720180818
1091996refclang -O3 -fomit-frame-pointer -Qunused-arguments2018101720180818
1104056refgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018101720180818
1105012refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018101720180818
1108776refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018101720180818
1112792refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018101720180818
1113064refclang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2018101720180818
1115116refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018101720180818
1117444refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2018101720180818
1122092refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2018101720180818
1123568refgcc -m64 -march=core2 -O3 -fomit-frame-pointer2018101720180818
1123732refgcc -march=barcelona -O2 -fomit-frame-pointer2018101720180818
1124088refgcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2018101720180818
1124416refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2018101720180818
1135744refclang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018101720180818
1142216refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2018101720180818
1148804refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2018101720180818
1151284refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2018101720180818
1153484refgcc -march=k8 -O3 -fomit-frame-pointer2018101720180818
1175208refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2018101720180818
1182524refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2018101720180818
1182560refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2018101720180818
1184300refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018101720180818
1185576refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2018101720180818
1186400refgcc -funroll-loops -O2 -fomit-frame-pointer2018101720180818
1186812refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2018101720180818
1187088refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2018101720180818
1190516refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2018101720180818
1193620refgcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2018101720180818
1193644refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2018101720180818
1193712refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018101720180818
1194380refgcc -m64 -O2 -fomit-frame-pointer2018101720180818
1194712refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2018101720180818
1195536refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2018101720180818
1196880refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018101720180818
1197604refgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018101720180818
1203032refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2018101720180818
1204008refgcc -O2 -fomit-frame-pointer2018101720180818
1206516refgcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2018101720180818
1206892refgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018101720180818
1208528refgcc -funroll-loops -m64 -O -fomit-frame-pointer2018101720180818
1214540refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018101720180818
1216532refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2018101720180818
1216652refgcc -march=k8 -O2 -fomit-frame-pointer2018101720180818
1225316refgcc -m64 -march=k8 -O -fomit-frame-pointer2018101720180818
1229396refgcc -march=k8 -O -fomit-frame-pointer2018101720180818
1230152refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2018101720180818
1232868refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018101720180818
1238608refgcc -march=barcelona -O -fomit-frame-pointer2018101720180818
1248808refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2018101720180818
1250216refgcc -funroll-loops -O -fomit-frame-pointer2018101720180818
1253416refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2018101720180818
1256268refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2018101720180818
1257676refgcc -m64 -march=core-avx-i -O -fomit-frame-pointer2018101720180818
1258320refgcc -m64 -march=corei7-avx -O -fomit-frame-pointer2018101720180818
1259960refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2018101720180818
1260320refgcc -march=nocona -O3 -fomit-frame-pointer2018101720180818
1262768refgcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2018101720180818
1263476refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2018101720180818
1267580refgcc -m64 -march=corei7 -O -fomit-frame-pointer2018101720180818
1269148refgcc -m64 -march=core2 -O -fomit-frame-pointer2018101720180818
1271388refgcc -O -fomit-frame-pointer2018101720180818
1273000refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2018101720180818
1274548refgcc -m64 -O -fomit-frame-pointer2018101720180818
1279376refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2018101720180818
1281340refgcc -m64 -march=barcelona -O -fomit-frame-pointer2018101720180818
1282904refgcc -m64 -march=nocona -Os -fomit-frame-pointer2018101720180818
1285092refgcc -fno-schedule-insns -O -fomit-frame-pointer2018101720180818
1287548refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2018101720180818
1290388refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2018101720180818
1292156refgcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2018101720180818
1294548refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2018101720180818
1294608refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2018101720180818
1299604refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2018101720180818
1302440refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2018101720180818
1304552refgcc -march=nocona -Os -fomit-frame-pointer2018101720180818
1308608refgcc -Os -fomit-frame-pointer2018101720180818
1308612refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2018101720180818
1308660refgcc -m64 -march=k8 -Os -fomit-frame-pointer2018101720180818
1309940refgcc -m64 -Os -fomit-frame-pointer2018101720180818
1310788refgcc -march=barcelona -Os -fomit-frame-pointer2018101720180818
1313596refgcc -m64 -march=core2 -Os -fomit-frame-pointer2018101720180818
1314560refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018101720180818
1315076refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2018101720180818
1315172refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2018101720180818
1319716refgcc -march=k8 -Os -fomit-frame-pointer2018101720180818
1319756refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2018101720180818
1320864refgcc -fno-schedule-insns -Os -fomit-frame-pointer2018101720180818
1322884refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2018101720180818
1327412refgcc -march=nocona -O2 -fomit-frame-pointer2018101720180818
1330288refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2018101720180818
1339436refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2018101720180818
1346288refgcc -funroll-loops -Os -fomit-frame-pointer2018101720180818
1398532refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2018101720180818
1413744refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2018101720180818
1450412refgcc -m64 -march=nocona -O -fomit-frame-pointer2018101720180818
1457604refgcc -march=nocona -O -fomit-frame-pointer2018101720180818
3986956refgcc2018101720180818
4025112refgcc -funroll-loops2018101720180818
4034956refcc2018101720180818

Compiler output

Implementation: crypto_kem/kyber1024/avx2
Compiler: cc
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 87, namely:
CompilerImplementations
cc avx2
gcc avx2
gcc -O2 -fomit-frame-pointer avx2
gcc -O3 -fomit-frame-pointer avx2
gcc -O -fomit-frame-pointer avx2
gcc -Os -fomit-frame-pointer avx2
gcc -fno-schedule-insns -O2 -fomit-frame-pointer avx2
gcc -fno-schedule-insns -O3 -fomit-frame-pointer avx2
gcc -fno-schedule-insns -O -fomit-frame-pointer avx2
gcc -fno-schedule-insns -Os -fomit-frame-pointer avx2
gcc -funroll-loops avx2
gcc -funroll-loops -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -O -fomit-frame-pointer avx2
gcc -funroll-loops -Os -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer avx2
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer avx2
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer avx2
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer avx2
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer avx2
gcc -m64 -O2 -fomit-frame-pointer avx2
gcc -m64 -O3 -fomit-frame-pointer avx2
gcc -m64 -O -fomit-frame-pointer avx2
gcc -m64 -Os -fomit-frame-pointer avx2
gcc -m64 -march=core2 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -O -fomit-frame-pointer avx2
gcc -m64 -march=core2 -Os -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer avx2
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -O -fomit-frame-pointer avx2
gcc -m64 -march=corei7 -Os -fomit-frame-pointer avx2
gcc -m64 -march=k8 -O2 -fomit-frame-pointer avx2
gcc -m64 -march=k8 -O3 -fomit-frame-pointer avx2
gcc -m64 -march=k8 -O -fomit-frame-pointer avx2
gcc -m64 -march=k8 -Os -fomit-frame-pointer avx2
gcc -m64 -march=nocona -O2 -fomit-frame-pointer avx2
gcc -m64 -march=nocona -O3 -fomit-frame-pointer avx2
gcc -m64 -march=nocona -O -fomit-frame-pointer avx2
gcc -m64 -march=nocona -Os -fomit-frame-pointer avx2
gcc -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -march=barcelona -O -fomit-frame-pointer avx2
gcc -march=barcelona -Os -fomit-frame-pointer avx2
gcc -march=k8 -O2 -fomit-frame-pointer avx2
gcc -march=k8 -O3 -fomit-frame-pointer avx2
gcc -march=k8 -O -fomit-frame-pointer avx2
gcc -march=k8 -Os -fomit-frame-pointer avx2
gcc -march=nocona -O2 -fomit-frame-pointer avx2
gcc -march=nocona -O3 -fomit-frame-pointer avx2
gcc -march=nocona -O -fomit-frame-pointer avx2
gcc -march=nocona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_kem/kyber1024/avx2
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:136:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: lanes1 = LOAD256u( curData1[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:137:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: lanes2 = LOAD256u( curData2[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'sse4.2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:138:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments avx2
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx2
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx2
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx2

Compiler output

Implementation: crypto_kem/kyber1024/avx2
Compiler: clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:140:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+0], lanes0 ),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: expanded from macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:141:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+1], lanes1 ),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: expanded from macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: note: expanded from macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_xor_si256' requires target feature 'avx2', but would be inlined into function 'KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx2'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments avx2
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx2

Compiler output

Implementation: crypto_kem/kyber1024/avx2
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:40: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
KeccakP-1600-times4-SIMD256.c: #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer avx2
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer avx2
gcc -m64 -march=barcelona -O -fomit-frame-pointer avx2
gcc -m64 -march=barcelona -Os -fomit-frame-pointer avx2

Compiler output

Implementation: crypto_kem/kyber1024/avx2
Compiler: gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c: In function 'KeccakP1600times4_AddLanesAll':
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:143:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+3], lanes3 )
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:149:9: note: in expansion of macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: Xor_In4( 12 );
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
KeccakP-1600-times4-SIMD256.c: from KeccakP-1600-times4-SIMD256.c:21:
KeccakP-1600-times4-SIMD256.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:913:1: error: inlining failed in call to always_inline '_mm256_xor_si256': target specific option mismatch
KeccakP-1600-times4-SIMD256.c: _mm256_xor_si256 (__m256i __A, __m256i __B)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:55:41: error: called from here
KeccakP-1600-times4-SIMD256.c: #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
KeccakP-1600-times4-SIMD256.c: ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:142:33: note: in expansion of macro 'XOReq256'
KeccakP-1600-times4-SIMD256.c: XOReq256( stateAsLanes[argIndex+2], lanes2 ),\
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer avx2
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer avx2
gcc -m64 -march=core-avx-i -O -fomit-frame-pointer avx2
gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -O -fomit-frame-pointer avx2
gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer avx2