Implementation notes: amd64, bolero, crypto_kem/ntrulpr4591761

Computer: bolero
Architecture: amd64
CPU ID: GenuineIntel-000406f1-bfebfbff
SUPERCOP version: 20190110
Operation: crypto_kem
Primitive: ntrulpr4591761
TimeImplementationCompilerBenchmark dateSUPERCOP version
194108avxgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018121720181216
195612avxgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018121720181216
195616avxgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018121720181216
199952avxclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018121720181216
200492avxclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018121720181216
215284avxgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018121720181216
215536avxgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018121720181216
216764avxgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018121720181216
218132avxgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018121720181216
219752avxgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018121720181216
221232avxgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018121720181216
221452avxgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018121720181216
222704avxgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018121720181216
230496avxgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018121720181216
54841912refgcc -O3 -fomit-frame-pointer2018121720181216
54843428refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2018121720181216
54843992refgcc -m64 -O3 -fomit-frame-pointer2018121720181216
54849048refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2018121720181216
54850416refgcc -march=k8 -O3 -fomit-frame-pointer2018121720181216
54855436refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2018121720181216
54860940refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2018121720181216
54861848refgcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2018121720181216
54861920refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2018121720181216
54862636refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018121720181216
54862956refgcc -march=k8 -O2 -fomit-frame-pointer2018121720181216
54863052refgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018121720181216
54863712refgcc -m64 -march=corei7 -O3 -fomit-frame-pointer2018121720181216
54863784refgcc -march=barcelona -O2 -fomit-frame-pointer2018121720181216
54864532refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018121720181216
54868284refgcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2018121720181216
54871284refgcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2018121720181216
54873696refgcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018121720181216
54884472refgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018121720181216
54887356refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2018121720181216
54888168refgcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2018121720181216
54889596refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018121720181216
54889832refgcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2018121720181216
54894992refgcc -m64 -march=core2 -O3 -fomit-frame-pointer2018121720181216
54897364refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018121720181216
54897380refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2018121720181216
54899100refclang -O3 -fomit-frame-pointer -Qunused-arguments2018121720181216
54901184refgcc -m64 -march=k8 -O -fomit-frame-pointer2018121720181216
54902484refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2018121720181216
54904128refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2018121720181216
54904448refgcc -m64 -march=core-avx-i -O -fomit-frame-pointer2018121720181216
54906272refgcc -m64 -march=core2 -O -fomit-frame-pointer2018121720181216
54907692refgcc -m64 -march=barcelona -O -fomit-frame-pointer2018121720181216
54909276refgcc -m64 -march=corei7-avx -O -fomit-frame-pointer2018121720181216
54911072refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2018121720181216
54911316refgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018121720181216
54913188refgcc -funroll-loops -O -fomit-frame-pointer2018121720181216
54914428refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2018121720181216
54915744refgcc -m64 -march=corei7 -O -fomit-frame-pointer2018121720181216
54925672refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018121720181216
54929864refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018121720181216
54933152refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2018121720181216
54933512refgcc -m64 -march=nocona -Os -fomit-frame-pointer2018121720181216
54935024refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2018121720181216
54937768refgcc -m64 -march=core2 -Os -fomit-frame-pointer2018121720181216
54938472refgcc -m64 -march=k8 -Os -fomit-frame-pointer2018121720181216
54939012refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2018121720181216
54939224refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018121720181216
54939776refgcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2018121720181216
54940252refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2018121720181216
54940544refgcc -m64 -Os -fomit-frame-pointer2018121720181216
54941276refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018121720181216
54942976refgcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2018121720181216
54943948refgcc -march=k8 -Os -fomit-frame-pointer2018121720181216
54945200refgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018121720181216
54945772refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018121720181216
54966012refgcc -march=k8 -O -fomit-frame-pointer2018121720181216
54986820refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018121720181216
54997168refgcc -march=nocona -Os -fomit-frame-pointer2018121720181216
55055752refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2018121720181216
55152712refgcc -march=barcelona -O3 -fomit-frame-pointer2018121720181216
55158112refgcc -m64 -O2 -fomit-frame-pointer2018121720181216
55176972refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2018121720181216
55195404refgcc -funroll-loops -m64 -O -fomit-frame-pointer2018121720181216
55205444refclang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018121720181216
55209748refgcc -march=barcelona -O -fomit-frame-pointer2018121720181216
55210240refgcc -O2 -fomit-frame-pointer2018121720181216
55225928refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018121720181216
55232436refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2018121720181216
55237576refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2018121720181216
55239780refgcc -O -fomit-frame-pointer2018121720181216
55253652refgcc -Os -fomit-frame-pointer2018121720181216
55255288refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018121720181216
55266880refgcc -fno-schedule-insns -Os -fomit-frame-pointer2018121720181216
55277364refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2018121720181216
55296240refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2018121720181216
55321756refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2018121720181216
55324328refclang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments2018121720181216
55327312refgcc -m64 -O -fomit-frame-pointer2018121720181216
55346508refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018121720181216
55350632refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2018121720181216
55376212refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018121720181216
55377288refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018121720181216
55466116refgcc -march=barcelona -Os -fomit-frame-pointer2018121720181216
55468500refclang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018121720181216
55524732refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2018121720181216
55543936refclang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2018121720181216
55563460refgcc -fno-schedule-insns -O -fomit-frame-pointer2018121720181216
55585452refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2018121720181216
55586672refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2018121720181216
55705184refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2018121720181216
55737072refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2018121720181216
55747568refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2018121720181216
55778272refgcc -funroll-loops -Os -fomit-frame-pointer2018121720181216
55876936refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2018121720181216
55942756refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2018121720181216
56005528refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2018121720181216
56035440refgcc -funroll-loops -O3 -fomit-frame-pointer2018121720181216
56143636refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2018121720181216
56308272refgcc -funroll-loops -O2 -fomit-frame-pointer2018121720181216
56313276refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2018121720181216
56315120refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2018121720181216
56317240refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2018121720181216
56572404refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2018121720181216
56693448refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2018121720181216
56836100refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2018121720181216
81390596refgcc -march=nocona -O2 -fomit-frame-pointer2018121720181216
81818708refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2018121720181216
81878480refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2018121720181216
82025860refgcc -m64 -march=nocona -O -fomit-frame-pointer2018121720181216
82289676refgcc -march=nocona -O3 -fomit-frame-pointer2018121720181216
82310016refgcc -march=nocona -O -fomit-frame-pointer2018121720181216
84658164refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2018121720181216
85104564refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2018121720181216
85127088refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2018121720181216
85185848refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2018121720181216
87338696refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2018121720181216
88291404refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2018121720181216
139396468refgcc -funroll-loops2018121720181216
139397020refgcc2018121720181216
141614120refcc2018121720181216

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: cc
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'fastsub':
mult.c: mult.c:91:22: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256 fastsub(__m256 x,__m256 y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 3, namely:
CompilerImplementations
cc avx
gcc avx
gcc -funroll-loops avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:147:22: error: invalid output size for constraint '=&x'
mult.c: MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c: ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c: ^
mult.c: mult.c:153:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:157:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments avx
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:93:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastsub' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,vm1_float,x);
mult.c: ^
mult.c: mult.c:100:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'reduce' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(q,vm4591_float,x);
mult.c: ^
mult.c: mult.c:88:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastadd' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: fatal error: error in backend: Cannot select: 0x30ae930: v8f32 = X86ISD::FMADD 0x3176140, 0x30bc660, 0x31a4530
mult.c: 0x3176140: v8f32,ch = loadgt;)>gt; 0x300bbb0, 0x319c390, undef:i64
mult.c: 0x319c390: i64 = add 0x3185ac0, Constant:i64gt;
mult.c: 0x3185ac0: i64 = add FrameIndex:i64gt;, 0x31a4b20
mult.c: 0x2fae340: i64 = FrameIndexgt;
mult.c: 0x31a4b20: i64,ch = CopyFromReg 0x300bbb0, Register:i64 %vreg78
mult.c: 0x319baa0: i64 = Register %vreg78
mult.c: 0x3175390: i64 = Constantgt;
mult.c: 0x318f3f0: i64 = undef
mult.c: 0x30bc660: v8f32 = X86ISD::VBROADCAST 0x3179280
mult.c: 0x3179280: f32,ch = loadgt; 0x300bbb0, 0x30afcd0, undef:i64
mult.c: 0x30afcd0: i64 = X86ISD::Wrapper TargetConstantPool:i64gt; 0
mult.c: 0x3196920: i64 = TargetConstantPoolgt; 0
mult.c: 0x318f3f0: i64 = undef
mult.c: 0x31a4530: v8f32,ch = loadgt;)>gt; 0x300bbb0, 0x30ac320, undef:i64
mult.c: 0x30ac320: i64 = add 0x3185990, Constant:i64gt;
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:93:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastsub' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,vm1_float,x);
mult.c: ^
mult.c: mult.c:100:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'reduce' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(q,vm4591_float,x);
mult.c: ^
mult.c: mult.c:88:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastadd' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: fatal error: error in backend: Cannot select: 0x2789f80: v8f32 = X86ISD::FMADD 0x27c3be0, 0x2745dd0, 0x27f5fb0
mult.c: 0x27c3be0: v8f32,ch = loadgt;)>gt; 0x265a8b0, 0x27ede10, undef:i64
mult.c: 0x27ede10: i64 = add 0x27d0140, Constant:i64gt;
mult.c: 0x27d0140: i64 = add FrameIndex:i64gt;, 0x27f65a0
mult.c: 0x25d42a0: i64 = FrameIndexgt;
mult.c: 0x27f65a0: i64,ch = CopyFromReg 0x265a8b0, Register:i64 %vreg78
mult.c: 0x27ed520: i64 = Register %vreg78
mult.c: 0x27d8f20: i64 = Constantgt;
mult.c: 0x27e0e70: i64 = undef
mult.c: 0x2745dd0: v8f32 = X86ISD::VBROADCAST 0x2798020
mult.c: 0x2798020: f32,ch = loadgt; 0x265a8b0, 0x278b320, undef:i64
mult.c: 0x278b320: i64 = X86ISD::Wrapper TargetConstantPool:i64gt; 0
mult.c: 0x27e83a0: i64 = TargetConstantPoolgt; 0
mult.c: 0x27e0e70: i64 = undef
mult.c: 0x27f5fb0: v8f32,ch = loadgt;)>gt; 0x265a8b0, 0x270a880, undef:i64
mult.c: 0x270a880: i64 = add 0x27d0010, Constant:i64gt;
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:732:17: error: always_inline function '_mm256_add_epi16' requires target feature 'avx2', but would be inlined into function 'rq_mult' that is compiled without support for 'avx2'
mult.c: __m256i x = _mm256_add_epi16(fgi,_mm256_add_epi16(fgip,fgip1));
mult.c: ^
mult.c: mult.c:732:38: error: always_inline function '_mm256_add_epi16' requires target feature 'avx2', but would be inlined into function 'rq_mult' that is compiled without support for 'avx2'
mult.c: __m256i x = _mm256_add_epi16(fgi,_mm256_add_epi16(fgip,fgip1));
mult.c: ^
mult.c: 2 errors generated.

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments avx
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: gcc -O2 -fomit-frame-pointer
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'squeezeadd16':
mult.c: mult.c:110:23: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256i squeezeadd16(__m256i x,__m256i y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 84, namely:
CompilerImplementations
gcc -O2 -fomit-frame-pointer avx
gcc -O3 -fomit-frame-pointer avx
gcc -O -fomit-frame-pointer avx
gcc -Os -fomit-frame-pointer avx
gcc -fno-schedule-insns -O2 -fomit-frame-pointer avx
gcc -fno-schedule-insns -O3 -fomit-frame-pointer avx
gcc -fno-schedule-insns -O -fomit-frame-pointer avx
gcc -fno-schedule-insns -Os -fomit-frame-pointer avx
gcc -funroll-loops -O2 -fomit-frame-pointer avx
gcc -funroll-loops -O3 -fomit-frame-pointer avx
gcc -funroll-loops -O -fomit-frame-pointer avx
gcc -funroll-loops -Os -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer avx
gcc -m64 -O2 -fomit-frame-pointer avx
gcc -m64 -O3 -fomit-frame-pointer avx
gcc -m64 -O -fomit-frame-pointer avx
gcc -m64 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O -fomit-frame-pointer avx
gcc -m64 -march=corei7 -Os -fomit-frame-pointer avx
gcc -m64 -march=k8 -O2 -fomit-frame-pointer avx
gcc -m64 -march=k8 -O3 -fomit-frame-pointer avx
gcc -m64 -march=k8 -O -fomit-frame-pointer avx
gcc -m64 -march=k8 -Os -fomit-frame-pointer avx
gcc -m64 -march=nocona -O2 -fomit-frame-pointer avx
gcc -m64 -march=nocona -O3 -fomit-frame-pointer avx
gcc -m64 -march=nocona -O -fomit-frame-pointer avx
gcc -m64 -march=nocona -Os -fomit-frame-pointer avx
gcc -march=barcelona -O2 -fomit-frame-pointer avx
gcc -march=barcelona -O3 -fomit-frame-pointer avx
gcc -march=barcelona -O -fomit-frame-pointer avx
gcc -march=barcelona -Os -fomit-frame-pointer avx
gcc -march=k8 -O2 -fomit-frame-pointer avx
gcc -march=k8 -O3 -fomit-frame-pointer avx
gcc -march=k8 -O -fomit-frame-pointer avx
gcc -march=k8 -Os -fomit-frame-pointer avx
gcc -march=nocona -O2 -fomit-frame-pointer avx
gcc -march=nocona -O3 -fomit-frame-pointer avx
gcc -march=nocona -O -fomit-frame-pointer avx
gcc -march=nocona -Os -fomit-frame-pointer avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'squeezeadd16':
mult.c: mult.c:110:23: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256i squeezeadd16(__m256i x,__m256i y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'squeezeadd16':
mult.c: mult.c:110:23: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256i squeezeadd16(__m256i x,__m256i y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer avx
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer avx
gcc -m64 -march=barcelona -O -fomit-frame-pointer avx
gcc -m64 -march=barcelona -Os -fomit-frame-pointer avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -O -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer avx