Implementation notes: amd64, bolero, crypto_kem/ntrulpr4591761

Computer: bolero
Architecture: amd64
CPU ID: GenuineIntel-000406f1-bfebfbff
SUPERCOP version: 20181209
Operation: crypto_kem
Primitive: ntrulpr4591761
TimeImplementationCompilerBenchmark dateSUPERCOP version
194052avxgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018102020180818
197392avxgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018102020180818
198844avxclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018102020180818
200272avxclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018102020180818
200708avxgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018102020180818
214900avxgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018102020180818
215488avxgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018102020180818
219444avxgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018102020180818
220152avxgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018102020180818
220376avxgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018102020180818
221276avxgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018102020180818
222040avxgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018102020180818
224312avxgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018102020180818
228072avxgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018102020180818
54845880refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2018102020180818
54845900refgcc -m64 -O3 -fomit-frame-pointer2018102020180818
54847300refgcc -O3 -fomit-frame-pointer2018102020180818
54852652refgcc -march=k8 -O3 -fomit-frame-pointer2018102020180818
54854912refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2018102020180818
54858088refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2018102020180818
54863872refgcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2018102020180818
54864024refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2018102020180818
54866128refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2018102020180818
54866688refgcc -m64 -march=corei7 -O3 -fomit-frame-pointer2018102020180818
54867116refgcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2018102020180818
54867332refgcc -march=barcelona -O2 -fomit-frame-pointer2018102020180818
54868444refgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2018102020180818
54869780refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2018102020180818
54870276refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2018102020180818
54872988refgcc -m64 -O2 -fomit-frame-pointer2018102020180818
54875608refgcc -march=k8 -O2 -fomit-frame-pointer2018102020180818
54877064refgcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2018102020180818
54878400refgcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2018102020180818
54889496refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2018102020180818
54890208refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2018102020180818
54890988refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2018102020180818
54891088refgcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2018102020180818
54893196refgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2018102020180818
54893800refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018102020180818
54895784refgcc -m64 -march=core2 -O3 -fomit-frame-pointer2018102020180818
54898336refgcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2018102020180818
54899844refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018102020180818
54901572refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018102020180818
54902616refclang -O3 -fomit-frame-pointer -Qunused-arguments2018102020180818
54903452refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2018102020180818
54904332refgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2018102020180818
54905268refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2018102020180818
54906088refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2018102020180818
54906780refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2018102020180818
54908100refgcc -funroll-loops -O -fomit-frame-pointer2018102020180818
54908872refgcc -m64 -march=barcelona -O -fomit-frame-pointer2018102020180818
54909936refgcc -m64 -march=k8 -O -fomit-frame-pointer2018102020180818
54910820refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2018102020180818
54910924refgcc -m64 -march=corei7-avx -O -fomit-frame-pointer2018102020180818
54911472refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2018102020180818
54911724refgcc -m64 -march=core2 -O -fomit-frame-pointer2018102020180818
54911952refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2018102020180818
54912280refgcc -m64 -O -fomit-frame-pointer2018102020180818
54913528refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2018102020180818
54914360refgcc -m64 -march=core-avx-i -O -fomit-frame-pointer2018102020180818
54917552refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2018102020180818
54919072refgcc -m64 -march=corei7 -O -fomit-frame-pointer2018102020180818
54923032refgcc -march=k8 -O -fomit-frame-pointer2018102020180818
54931124refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2018102020180818
54932728refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2018102020180818
54936944refgcc -m64 -march=core2 -Os -fomit-frame-pointer2018102020180818
54937200refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2018102020180818
54938300refgcc -m64 -march=nocona -Os -fomit-frame-pointer2018102020180818
54940420refgcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2018102020180818
54941256refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2018102020180818
54941768refgcc -m64 -Os -fomit-frame-pointer2018102020180818
54942532refgcc -march=k8 -Os -fomit-frame-pointer2018102020180818
54942896refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2018102020180818
54946716refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2018102020180818
54949608refgcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2018102020180818
54950640refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2018102020180818
54950992refgcc -m64 -march=k8 -Os -fomit-frame-pointer2018102020180818
54954448refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2018102020180818
54955124refgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2018102020180818
54957476refgcc -march=nocona -Os -fomit-frame-pointer2018102020180818
54983112refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2018102020180818
55013816refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2018102020180818
55143168refgcc -funroll-loops -m64 -O -fomit-frame-pointer2018102020180818
55175008refgcc -march=barcelona -O3 -fomit-frame-pointer2018102020180818
55178016refgcc -O2 -fomit-frame-pointer2018102020180818
55204060refclang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018102020180818
55214928refgcc -march=barcelona -O -fomit-frame-pointer2018102020180818
55228176refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2018102020180818
55233484refgcc -O -fomit-frame-pointer2018102020180818
55248776refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2018102020180818
55256524refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2018102020180818
55259388refgcc -Os -fomit-frame-pointer2018102020180818
55268888refgcc -fno-schedule-insns -Os -fomit-frame-pointer2018102020180818
55299112refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2018102020180818
55331792refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2018102020180818
55339732refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2018102020180818
55360972refclang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments2018102020180818
55419372refgcc -march=barcelona -Os -fomit-frame-pointer2018102020180818
55486884refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2018102020180818
55511100refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2018102020180818
55511816refgcc -fno-schedule-insns -O -fomit-frame-pointer2018102020180818
55531640refclang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2018102020180818
55546712refclang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2018102020180818
55594056refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2018102020180818
55681460refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2018102020180818
55692528refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2018102020180818
55741896refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2018102020180818
55763444refgcc -funroll-loops -Os -fomit-frame-pointer2018102020180818
55938324refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2018102020180818
55968940refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2018102020180818
56024216refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2018102020180818
56139928refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2018102020180818
56195748refgcc -funroll-loops -O3 -fomit-frame-pointer2018102020180818
56320776refgcc -funroll-loops -O2 -fomit-frame-pointer2018102020180818
56322788refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2018102020180818
56323836refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2018102020180818
56327452refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2018102020180818
56333036refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2018102020180818
56690876refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2018102020180818
56854952refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2018102020180818
81399184refgcc -march=nocona -O2 -fomit-frame-pointer2018102020180818
81844040refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2018102020180818
81875704refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2018102020180818
82001240refgcc -m64 -march=nocona -O -fomit-frame-pointer2018102020180818
82205868refgcc -march=nocona -O3 -fomit-frame-pointer2018102020180818
82336496refgcc -march=nocona -O -fomit-frame-pointer2018102020180818
84672716refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2018102020180818
85111856refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2018102020180818
85168908refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2018102020180818
85190392refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2018102020180818
87274232refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2018102020180818
88216572refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2018102020180818
139399336refgcc -funroll-loops2018102020180818
139406516refgcc2018102020180818
141745764refcc2018102020180818

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: cc
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'fastsub':
mult.c: mult.c:91:22: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256 fastsub(__m256 x,__m256 y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 3, namely:
CompilerImplementations
cc avx
gcc avx
gcc -funroll-loops avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:147:22: error: invalid output size for constraint '=&x'
mult.c: MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c: ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c: ^
mult.c: mult.c:153:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:157:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments avx
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:93:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastsub' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,vm1_float,x);
mult.c: ^
mult.c: mult.c:100:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'reduce' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(q,vm4591_float,x);
mult.c: ^
mult.c: mult.c:88:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastadd' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: fatal error: error in backend: Cannot select: 0x350a290: v8f32 = X86ISD::FMADD 0x35b8a20, 0x35acab0, 0x35ece10
mult.c: 0x35b8a20: v8f32,ch = loadgt;)>gt; 0x3455ba0, 0x35e4c70, undef:i64
mult.c: 0x35e4c70: i64 = add 0x35c4f80, Constant:i64gt;
mult.c: 0x35c4f80: i64 = add FrameIndex:i64gt;, 0x35ed400
mult.c: 0x3586f00: i64 = FrameIndexgt;
mult.c: 0x35ed400: i64,ch = CopyFromReg 0x3455ba0, Register:i64 %vreg78
mult.c: 0x35e4380: i64 = Register %vreg78
mult.c: 0x35cdd60: i64 = Constantgt;
mult.c: 0x35d7cd0: i64 = undef
mult.c: 0x35acab0: v8f32 = X86ISD::VBROADCAST 0x3576300
mult.c: 0x3576300: f32,ch = loadgt; 0x3455ba0, 0x358ce90, undef:i64
mult.c: 0x358ce90: i64 = X86ISD::Wrapper TargetConstantPool:i64gt; 0
mult.c: 0x35df200: i64 = TargetConstantPoolgt; 0
mult.c: 0x35d7cd0: i64 = undef
mult.c: 0x35ece10: v8f32,ch = loadgt;)>gt; 0x3455ba0, 0x3507c80, undef:i64
mult.c: 0x3507c80: i64 = add 0x35c4e50, Constant:i64gt;
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:93:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastsub' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,vm1_float,x);
mult.c: ^
mult.c: mult.c:100:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'reduce' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(q,vm4591_float,x);
mult.c: ^
mult.c: mult.c:88:10: error: always_inline function '_mm256_fmadd_ps' requires target feature 'fma', but would be inlined into function 'fastadd' that is compiled without support for 'fma'
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: fatal error: error in backend: Cannot select: 0x3894040: v8f32 = X86ISD::FMADD 0x38e1d40, 0x3816090, 0x38ff830
mult.c: 0x38e1d40: v8f32,ch = loadgt;)>gt; 0x37658e0, 0x38f7690, undef:i64
mult.c: 0x38f7690: i64 = add 0x38d81b0, Constant:i64gt;
mult.c: 0x38d81b0: i64 = add FrameIndex:i64gt;, 0x38ffe20
mult.c: 0x3813250: i64 = FrameIndexgt;
mult.c: 0x38ffe20: i64,ch = CopyFromReg 0x37658e0, Register:i64 %vreg78
mult.c: 0x38f6da0: i64 = Register %vreg78
mult.c: 0x38e0f90: i64 = Constantgt;
mult.c: 0x38ca9e0: i64 = undef
mult.c: 0x3816090: v8f32 = X86ISD::VBROADCAST 0x38ccd70
mult.c: 0x38ccd70: f32,ch = loadgt; 0x37658e0, 0x38953e0, undef:i64
mult.c: 0x38953e0: i64 = X86ISD::Wrapper TargetConstantPool:i64gt; 0
mult.c: 0x38f1c20: i64 = TargetConstantPoolgt; 0
mult.c: 0x38ca9e0: i64 = undef
mult.c: 0x38ff830: v8f32,ch = loadgt;)>gt; 0x37658e0, 0x3891a30, undef:i64
mult.c: 0x3891a30: i64 = add 0x38d8080, Constant:i64gt;
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments
mult.c: mult.c:732:17: error: always_inline function '_mm256_add_epi16' requires target feature 'avx2', but would be inlined into function 'rq_mult' that is compiled without support for 'avx2'
mult.c: __m256i x = _mm256_add_epi16(fgi,_mm256_add_epi16(fgip,fgip1));
mult.c: ^
mult.c: mult.c:732:38: error: always_inline function '_mm256_add_epi16' requires target feature 'avx2', but would be inlined into function 'rq_mult' that is compiled without support for 'avx2'
mult.c: __m256i x = _mm256_add_epi16(fgi,_mm256_add_epi16(fgip,fgip1));
mult.c: ^
mult.c: 2 errors generated.

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments avx
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: gcc -O2 -fomit-frame-pointer
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'squeezeadd16':
mult.c: mult.c:110:23: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256i squeezeadd16(__m256i x,__m256i y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 84, namely:
CompilerImplementations
gcc -O2 -fomit-frame-pointer avx
gcc -O3 -fomit-frame-pointer avx
gcc -O -fomit-frame-pointer avx
gcc -Os -fomit-frame-pointer avx
gcc -fno-schedule-insns -O2 -fomit-frame-pointer avx
gcc -fno-schedule-insns -O3 -fomit-frame-pointer avx
gcc -fno-schedule-insns -O -fomit-frame-pointer avx
gcc -fno-schedule-insns -Os -fomit-frame-pointer avx
gcc -funroll-loops -O2 -fomit-frame-pointer avx
gcc -funroll-loops -O3 -fomit-frame-pointer avx
gcc -funroll-loops -O -fomit-frame-pointer avx
gcc -funroll-loops -Os -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer avx
gcc -m64 -O2 -fomit-frame-pointer avx
gcc -m64 -O3 -fomit-frame-pointer avx
gcc -m64 -O -fomit-frame-pointer avx
gcc -m64 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O -fomit-frame-pointer avx
gcc -m64 -march=corei7 -Os -fomit-frame-pointer avx
gcc -m64 -march=k8 -O2 -fomit-frame-pointer avx
gcc -m64 -march=k8 -O3 -fomit-frame-pointer avx
gcc -m64 -march=k8 -O -fomit-frame-pointer avx
gcc -m64 -march=k8 -Os -fomit-frame-pointer avx
gcc -m64 -march=nocona -O2 -fomit-frame-pointer avx
gcc -m64 -march=nocona -O3 -fomit-frame-pointer avx
gcc -m64 -march=nocona -O -fomit-frame-pointer avx
gcc -m64 -march=nocona -Os -fomit-frame-pointer avx
gcc -march=barcelona -O2 -fomit-frame-pointer avx
gcc -march=barcelona -O3 -fomit-frame-pointer avx
gcc -march=barcelona -O -fomit-frame-pointer avx
gcc -march=barcelona -Os -fomit-frame-pointer avx
gcc -march=k8 -O2 -fomit-frame-pointer avx
gcc -march=k8 -O3 -fomit-frame-pointer avx
gcc -march=k8 -O -fomit-frame-pointer avx
gcc -march=k8 -Os -fomit-frame-pointer avx
gcc -march=nocona -O2 -fomit-frame-pointer avx
gcc -march=nocona -O3 -fomit-frame-pointer avx
gcc -march=nocona -O -fomit-frame-pointer avx
gcc -march=nocona -Os -fomit-frame-pointer avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'squeezeadd16':
mult.c: mult.c:110:23: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256i squeezeadd16(__m256i x,__m256i y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: mult.c: In function 'add':
mult.c: mult.c:82:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult.c: {
mult.c: ^
mult.c: mult.c: In function 'squeezeadd16':
mult.c: mult.c:110:23: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult.c: static inline __m256i squeezeadd16(__m256i x,__m256i y)
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:41:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avxintrin.h:1285:1: error: inlining failed in call to always_inline '_mm256_set1_ps': target specific option mismatch
mult.c: _mm256_set1_ps (float __A)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer avx
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer avx
gcc -m64 -march=barcelona -O -fomit-frame-pointer avx
gcc -m64 -march=barcelona -Os -fomit-frame-pointer avx

Compiler output

Implementation: crypto_kem/ntrulpr4591761/avx
Compiler: gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: mult.c: In function 'fastadd':
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^
mult.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:79:0,
mult.c: from mult.c:2:
mult.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/fmaintrin.h:63:1: error: inlining failed in call to always_inline '_mm256_fmadd_ps': target specific option mismatch
mult.c: _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
mult.c: ^
mult.c: mult.c:88:10: error: called from here
mult.c: return _mm256_fmadd_ps(y,v1_float,x);
mult.c: ^

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -O -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer avx