Implementation notes: amd64, gpu, crypto_core/multsntrup857

Computer: gpu
Architecture: amd64
CPU ID: GenuineIntel-000206d7-bfebfbff
SUPERCOP version: 20190816
Operation: crypto_core
Primitive: multsntrup857
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
5255083767 0 016990 776 832refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv2019082220190816
5483003750 0 017070 776 832refgcc_-m64_-march=corei7-avx_-O3_-fomit-frame-pointer2019082220190816
5614323750 0 017070 776 832refgcc_-m64_-march=core-avx-i_-O3_-fomit-frame-pointer2019082220190816
5741763750 0 017070 776 832refgcc_-m64_-march=native_-mtune=native_-O3_-fomit-frame-pointer2019082220190816
6173403894 0 017150 776 832refgcc_-m64_-march=core2_-msse4_-O3_-fomit-frame-pointer2019082220190816
6187443894 0 017086 776 832refgcc_-m64_-march=core2_-msse4.1_-O3_-fomit-frame-pointer2019082220190816
6200434550 0 017790 776 832refgcc_-m64_-march=core2_-O3_-fomit-frame-pointer2019082220190816
6282283894 0 016998 776 832refgcc_-m64_-march=corei7_-O3_-fomit-frame-pointer2019082220190816
8702364358 0 019573 760 1128refclang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019082220190816
9103604358 0 019573 760 1128refclang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019082220190816
9959442867 0 018725 760 1128refclang_-O3_-fwrapv_-march=native_-fomit-frame-pointer_-Qunused-arguments2019082220190816
10331682867 0 018725 760 1128refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082220190816
16272542355 0 020597 760 776refclang_-mcpu=cortex-a8_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082220190816
16305212355 0 020597 760 776refclang_-mcpu=cortex-a9_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082220190816
16724122355 0 019269 760 776refclang_-O3_-fomit-frame-pointer_-Qunused-arguments2019082220190816
16942642355 0 020597 760 776refclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082220190816
18940562079 0 016349 768 832refgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082220190816
18991802079 0 016349 768 832refgcc_-funroll-loops_-O2_-fomit-frame-pointer2019082220190816
19075043637 0 019198 776 832refgcc_-funroll-loops_-m64_-march=nocona_-O3_-fomit-frame-pointer2019082220190816
19111843458 0 019126 776 832refgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082220190816
19150563458 0 019126 776 832refgcc_-funroll-loops_-m64_-O3_-fomit-frame-pointer2019082220190816
19169962176 0 016357 768 832refgcc_-funroll-loops_-march=k8_-O2_-fomit-frame-pointer2019082220190816
19246962079 0 016349 768 832refgcc_-funroll-loops_-m64_-O2_-fomit-frame-pointer2019082220190816
19354005751 0 022022 776 832refgcc_-funroll-loops_-march=k8_-O3_-fomit-frame-pointer2019082220190816
19412562208 0 016525 768 832refgcc_-funroll-loops_-march=barcelona_-O2_-fomit-frame-pointer2019082220190816
19531645751 0 022022 776 832refgcc_-funroll-loops_-m64_-march=k8_-O3_-fomit-frame-pointer2019082220190816
19538522176 0 016357 768 832refgcc_-funroll-loops_-m64_-march=k8_-O2_-fomit-frame-pointer2019082220190816
19741605823 0 022206 776 832refgcc_-funroll-loops_-m64_-march=barcelona_-O3_-fomit-frame-pointer2019082220190816
19788393637 0 019198 776 832refgcc_-funroll-loops_-march=nocona_-O3_-fomit-frame-pointer2019082220190816
19797085823 0 022206 776 832refgcc_-funroll-loops_-march=barcelona_-O3_-fomit-frame-pointer2019082220190816
20169922208 0 016525 768 832refgcc_-funroll-loops_-m64_-march=barcelona_-O2_-fomit-frame-pointer2019082220190816
20192562084 0 016189 768 832refgcc_-funroll-loops_-m64_-march=nocona_-O2_-fomit-frame-pointer2019082220190816
20390362084 0 016189 768 832refgcc_-funroll-loops_-march=nocona_-O2_-fomit-frame-pointer2019082220190816
20802963458 0 019126 776 832refgcc_-funroll-loops_-O3_-fomit-frame-pointer2019082220190816
23404002053 0 016229 768 832refgcc_-funroll-loops_-m64_-march=barcelona_-O_-fomit-frame-pointer2019082220190816
23848412053 0 016229 768 832refgcc_-funroll-loops_-march=barcelona_-O_-fomit-frame-pointer2019082220190816
24196322072 0 016453 768 832refgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019082220190816
25322082053 0 016429 768 832refgcc_-funroll-loops_-m64_-march=k8_-O_-fomit-frame-pointer2019082220190816
25621202110 0 016613 768 832refgcc_-funroll-loops_-march=nocona_-O_-fomit-frame-pointer2019082220190816
25729922072 0 016453 768 832refgcc_-funroll-loops_-m64_-O_-fomit-frame-pointer2019082220190816
26116842110 0 016613 768 832refgcc_-funroll-loops_-m64_-march=nocona_-O_-fomit-frame-pointer2019082220190816
26256762072 0 016453 768 832refgcc_-funroll-loops_-O_-fomit-frame-pointer2019082220190816
26426522053 0 016429 768 832refgcc_-funroll-loops_-march=k8_-O_-fomit-frame-pointer2019082220190816
2870019536 0 010597 760 800refgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082220190816
2992152587 0 011493 768 832refgcc_-march=barcelona_-O_-fomit-frame-pointer2019082220190816
30082001861 0 015214 776 832refgcc_-m64_-O3_-fomit-frame-pointer2019082220190816
30111244331 0 018118 776 832refgcc_-march=barcelona_-O3_-fomit-frame-pointer2019082220190816
30111761861 0 015214 776 832refgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082220190816
3032520715 0 011717 768 832refgcc_-march=k8_-O2_-fomit-frame-pointer2019082220190816
3034128677 0 011605 768 832refgcc_-m64_-march=corei7_-O2_-fomit-frame-pointer2019082220190816
3040936669 0 011653 768 832refgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082220190816
3042287779 0 012045 768 832refgcc_-march=barcelona_-O2_-fomit-frame-pointer2019082220190816
3042796677 0 011605 768 832refgcc_-m64_-march=core-avx-i_-O2_-fomit-frame-pointer2019082220190816
30481231861 0 015214 776 832refgcc_-O3_-fomit-frame-pointer2019082220190816
3050928677 0 011613 768 832refgcc_-m64_-march=core2_-O2_-fomit-frame-pointer2019082220190816
3053936669 0 011653 768 832refgcc_-O2_-fomit-frame-pointer2019082220190816
30544808662 0 018118 776 832refgcc_-m64_-march=barcelona_-O3_-fomit-frame-pointer2019082220190816
3064124677 0 011605 768 832refgcc_-m64_-march=native_-mtune=native_-O2_-fomit-frame-pointer2019082220190816
3067696677 0 011613 768 832refgcc_-m64_-march=core2_-msse4.1_-O2_-fomit-frame-pointer2019082220190816
30769681558 0 012045 768 832refgcc_-m64_-march=barcelona_-O2_-fomit-frame-pointer2019082220190816
3097428669 0 011653 768 832refgcc_-m64_-O2_-fomit-frame-pointer2019082220190816
31045284227 0 017854 776 832refgcc_-march=k8_-O3_-fomit-frame-pointer2019082220190816
3108756677 0 011613 768 832refgcc_-m64_-march=core2_-msse4_-O2_-fomit-frame-pointer2019082220190816
31120564227 0 017854 776 832refgcc_-m64_-march=k8_-O3_-fomit-frame-pointer2019082220190816
3114784860 0 011709 768 832refgcc_-m64_-march=nocona_-O2_-fomit-frame-pointer2019082220190816
3118076677 0 011605 768 832refgcc_-m64_-march=corei7-avx_-O2_-fomit-frame-pointer2019082220190816
3126284715 0 011717 768 832refgcc_-m64_-march=k8_-O2_-fomit-frame-pointer2019082220190816
31847402254 0 015478 776 832refgcc_-m64_-march=nocona_-O3_-fomit-frame-pointer2019082220190816
3187024860 0 011709 768 832refgcc_-march=nocona_-O2_-fomit-frame-pointer2019082220190816
3232400587 0 011549 768 832refgcc_-march=k8_-O_-fomit-frame-pointer2019082220190816
3241212536 0 010597 760 800refgcc_-funroll-loops_-Os_-fomit-frame-pointer2019082220190816
32437042254 0 015478 776 832refgcc_-march=nocona_-O3_-fomit-frame-pointer2019082220190816
3256128592 0 011525 768 832refgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019082220190816
3262240529 0 010557 760 800refgcc_-march=k8_-Os_-fomit-frame-pointer2019082220190816
3271404661 0 011589 768 832refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv2019082220190816
3290292529 0 010557 760 800refgcc_-m64_-march=nocona_-Os_-fomit-frame-pointer2019082220190816
3295688592 0 011445 768 832refgcc_-m64_-march=core-avx-i_-O_-fomit-frame-pointer2019082220190816
3306776592 0 011445 768 832refgcc_-m64_-march=corei7-avx_-O_-fomit-frame-pointer2019082220190816
3309480592 0 011525 768 832refgcc_-m64_-O_-fomit-frame-pointer2019082220190816
3314752536 0 010597 760 800refgcc_-funroll-loops_-march=k8_-Os_-fomit-frame-pointer2019082220190816
33148201058 0 010557 760 800refgcc_-m64_-march=barcelona_-Os_-fomit-frame-pointer2019082220190816
3315090529 0 010557 760 800refgcc_-Os_-fomit-frame-pointer2019082220190816
3320052592 0 011445 768 832refgcc_-m64_-march=native_-mtune=native_-O_-fomit-frame-pointer2019082220190816
3329848529 0 010557 760 800refgcc_-m64_-march=core2_-Os_-fomit-frame-pointer2019082220190816
3332292529 0 010557 760 800refgcc_-m64_-march=core2_-msse4.1_-Os_-fomit-frame-pointer2019082220190816
3335613529 0 010557 760 800refgcc_-m64_-march=k8_-Os_-fomit-frame-pointer2019082220190816
3340020592 0 011525 768 832refgcc_-O_-fomit-frame-pointer2019082220190816
3343032529 0 010573 760 800refgcc_-m64_-march=corei7-avx_-Os_-fomit-frame-pointer2019082220190816
3350200529 0 010557 760 800refgcc_-m64_-Os_-fomit-frame-pointer2019082220190816
3352269529 0 010557 760 800refgcc_-m64_-march=core2_-msse4_-Os_-fomit-frame-pointer2019082220190816
3353540632 0 011749 768 832refgcc_-m64_-march=nocona_-O_-fomit-frame-pointer2019082220190816
3363832536 0 010597 760 800refgcc_-funroll-loops_-march=nocona_-Os_-fomit-frame-pointer2019082220190816
3367412592 0 011445 768 832refgcc_-m64_-march=core2_-msse4.1_-O_-fomit-frame-pointer2019082220190816
3375460529 0 010573 760 800refgcc_-m64_-march=core-avx-i_-Os_-fomit-frame-pointer2019082220190816
3378360529 0 010557 760 800refgcc_-m64_-march=corei7_-Os_-fomit-frame-pointer2019082220190816
3382684529 0 010573 760 800refgcc_-m64_-march=native_-mtune=native_-Os_-fomit-frame-pointer2019082220190816
3384380592 0 011445 768 832refgcc_-m64_-march=core2_-msse4_-O_-fomit-frame-pointer2019082220190816
3390476529 0 010557 760 800refgcc_-march=nocona_-Os_-fomit-frame-pointer2019082220190816
3393624592 0 011445 768 832refgcc_-m64_-march=corei7_-O_-fomit-frame-pointer2019082220190816
3398599592 0 011445 768 832refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv2019082220190816
3404460592 0 011445 768 832refgcc_-m64_-march=core2_-O_-fomit-frame-pointer2019082220190816
3411136536 0 010597 760 800refgcc_-funroll-loops_-m64_-march=nocona_-Os_-fomit-frame-pointer2019082220190816
34146481174 0 011493 768 832refgcc_-m64_-march=barcelona_-O_-fomit-frame-pointer2019082220190816
3422868529 0 010557 760 800refgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082220190816
3429748536 0 010597 760 800refgcc_-funroll-loops_-m64_-Os_-fomit-frame-pointer2019082220190816
3431196536 0 010597 760 800refgcc_-funroll-loops_-m64_-march=k8_-Os_-fomit-frame-pointer2019082220190816
3452308536 0 010597 760 800refgcc_-funroll-loops_-march=barcelona_-Os_-fomit-frame-pointer2019082220190816
3452520529 0 010557 760 800refgcc_-march=barcelona_-Os_-fomit-frame-pointer2019082220190816
3453836536 0 010597 760 800refgcc_-funroll-loops_-m64_-march=barcelona_-Os_-fomit-frame-pointer2019082220190816
3468316587 0 011549 768 832refgcc_-m64_-march=k8_-O_-fomit-frame-pointer2019082220190816
3470464632 0 011749 768 832refgcc_-march=nocona_-O_-fomit-frame-pointer2019082220190816
3646539527 0 010557 760 800refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv2019082220190816
94076161004 0 013238 760 792refgcc_-funroll-loops2019082220190816
101443961004 0 013238 760 792refcc2019082220190816
105063641004 0 013238 760 792refgcc2019082220190816

Test failure

Implementation: avx
Security model: unknown
Compiler: gcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 6, namely:
CompilerImplementations
gcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core-avx2 -O -fomit-frame-pointer avx
gcc -m64 -march=core-avx2 -Os -fomit-frame-pointer avx
gcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer ref
gcc -m64 -march=core-avx2 -O -fomit-frame-pointer ref
gcc -m64 -march=core-avx2 -Os -fomit-frame-pointer ref

Compiler output

Implementation: avx
Security model: unknown
Compiler: cc
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: mult1024.c:19:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult1024.c: {
mult1024.c: ^
mult1024.c: mult1024.c:18:17: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult1024.c: static int16x16 squeeze_5167_x16(int16x16 x)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: mult1024.c: In function 'freeze_5167_x16':
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:207:1: error: inlining failed in call to always_inline '_mm256_blendv_epi8': target specific option mismatch
mult1024.c: _mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
mult1024.c: ^~~~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:291:5: note: called from here
mult1024.c: x = _mm256_blendv_epi8(xq,x,mask);
mult1024.c: ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:808:1: error: inlining failed in call to always_inline '_mm256_sub_epi16': target specific option mismatch
mult1024.c: _mm256_sub_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:290:6: note: called from here
mult1024.c: xq = sub_x16(x,const_x16(q));
mult1024.c: ~~~^~~~~~~~~~~~~~~~~~~~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:41:0,
mult1024.c: ...

Number of similar (compiler,implementation) pairs: 3, namely:
CompilerImplementations
cc avx
gcc avx
gcc -funroll-loops avx

Compiler output

Implementation: avx
Security model: unknown
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
mult1024.c: mult1024.c:304:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'sse4.2'
mult1024.c: x = const_x16(0);
mult1024.c: ^
mult1024.c: mult1024.c:10:19: note: expanded from macro 'const_x16'
mult1024.c: #define const_x16 _mm256_set1_epi16
mult1024.c: ^
mult1024.c: mult1024.c:305:36: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'sse4.2'
mult1024.c: for (i = p&~15;i < 1024;i += 16) store_x16(&f[i],x);
mult1024.c: ^
mult1024.c: mult1024.c:9:24: note: expanded from macro 'store_x16'
mult1024.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult1024.c: ^
mult1024.c: mult1024.c:306:36: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'sse4.2'
mult1024.c: for (i = p&~15;i < 1024;i += 16) store_x16(&g[i],x);
mult1024.c: ^
mult1024.c: mult1024.c:9:24: note: expanded from macro 'store_x16'
mult1024.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult1024.c: ^
mult1024.c: mult1024.c:311:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'sse4.2'
mult1024.c: x = load_x16(&f[i]);
mult1024.c: ^
mult1024.c: mult1024.c:8:21: note: expanded from macro 'load_x16'
mult1024.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult1024.c: ^
mult1024.c: mult1024.c:313:5: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'sse4.2'
mult1024.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments avx
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx

Compiler output

Implementation: avx
Security model: unknown
Compiler: clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments
mult1024.c: mult1024.c:328:9: error: always_inline function '_mm256_add_epi16' requires target feature 'avx2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'avx2'
mult1024.c: x = add_x16(fgi,add_x16(fgip,fgip1));
mult1024.c: ^
mult1024.c: mult1024.c:11:17: note: expanded from macro 'add_x16'
mult1024.c: #define add_x16 _mm256_add_epi16
mult1024.c: ^
mult1024.c: mult1024.c:328:21: error: always_inline function '_mm256_add_epi16' requires target feature 'avx2', but would be inlined into function 'crypto_core_multsntrup857_avx' that is compiled without support for 'avx2'
mult1024.c: x = add_x16(fgi,add_x16(fgip,fgip1));
mult1024.c: ^
mult1024.c: mult1024.c:11:17: note: expanded from macro 'add_x16'
mult1024.c: #define add_x16 _mm256_add_epi16
mult1024.c: ^
mult1024.c: 2 errors generated.

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments avx
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments avx
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments avx
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments avx

Compiler output

Implementation: avx
Security model: unknown
Compiler: gcc -O2 -fomit-frame-pointer
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: mult1024.c:19:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult1024.c: {
mult1024.c: ^
mult1024.c: mult1024.c: In function 'squeeze_7681_x16':
mult1024.c: mult1024.c:23:17: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult1024.c: static int16x16 squeeze_7681_x16(int16x16 x)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:808:1: error: inlining failed in call to always_inline '_mm256_sub_epi16': target specific option mismatch
mult1024.c: _mm256_sub_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:12:17: note: called from here
mult1024.c: #define sub_x16 _mm256_sub_epi16
mult1024.c: ^
mult1024.c: mult1024.c:20:10: note: in expansion of macro 'sub_x16'
mult1024.c: return sub_x16(x,mullo_x16(mulhrs_x16(x,const_x16(6)),const_x16(5167)));
mult1024.c: ^~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:553:1: error: inlining failed in call to always_inline '_mm256_mullo_epi16': target specific option mismatch
mult1024.c: _mm256_mullo_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~~~
mult1024.c: ...

Number of similar (compiler,implementation) pairs: 84, namely:
CompilerImplementations
gcc -O2 -fomit-frame-pointer avx
gcc -O3 -fomit-frame-pointer avx
gcc -O -fomit-frame-pointer avx
gcc -Os -fomit-frame-pointer avx
gcc -fno-schedule-insns -O2 -fomit-frame-pointer avx
gcc -fno-schedule-insns -O3 -fomit-frame-pointer avx
gcc -fno-schedule-insns -O -fomit-frame-pointer avx
gcc -fno-schedule-insns -Os -fomit-frame-pointer avx
gcc -funroll-loops -O2 -fomit-frame-pointer avx
gcc -funroll-loops -O3 -fomit-frame-pointer avx
gcc -funroll-loops -O -fomit-frame-pointer avx
gcc -funroll-loops -Os -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer avx
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer avx
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer avx
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer avx
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer avx
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer avx
gcc -m64 -O2 -fomit-frame-pointer avx
gcc -m64 -O3 -fomit-frame-pointer avx
gcc -m64 -O -fomit-frame-pointer avx
gcc -m64 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer avx
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer avx
gcc -m64 -march=corei7 -O -fomit-frame-pointer avx
gcc -m64 -march=corei7 -Os -fomit-frame-pointer avx
gcc -m64 -march=k8 -O2 -fomit-frame-pointer avx
gcc -m64 -march=k8 -O3 -fomit-frame-pointer avx
gcc -m64 -march=k8 -O -fomit-frame-pointer avx
gcc -m64 -march=k8 -Os -fomit-frame-pointer avx
gcc -m64 -march=nocona -O2 -fomit-frame-pointer avx
gcc -m64 -march=nocona -O3 -fomit-frame-pointer avx
gcc -m64 -march=nocona -O -fomit-frame-pointer avx
gcc -m64 -march=nocona -Os -fomit-frame-pointer avx
gcc -march=barcelona -O2 -fomit-frame-pointer avx
gcc -march=barcelona -O3 -fomit-frame-pointer avx
gcc -march=barcelona -O -fomit-frame-pointer avx
gcc -march=barcelona -Os -fomit-frame-pointer avx
gcc -march=k8 -O2 -fomit-frame-pointer avx
gcc -march=k8 -O3 -fomit-frame-pointer avx
gcc -march=k8 -O -fomit-frame-pointer avx
gcc -march=k8 -Os -fomit-frame-pointer avx
gcc -march=nocona -O2 -fomit-frame-pointer avx
gcc -march=nocona -O3 -fomit-frame-pointer avx
gcc -march=nocona -O -fomit-frame-pointer avx
gcc -march=nocona -Os -fomit-frame-pointer avx

Compiler output

Implementation: avx
Security model: unknown
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: mult1024.c:19:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult1024.c: {
mult1024.c: ^
mult1024.c: mult1024.c: In function 'squeeze_7681_x16':
mult1024.c: mult1024.c:23:17: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult1024.c: static int16x16 squeeze_7681_x16(int16x16 x)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:808:1: error: inlining failed in call to always_inline '_mm256_sub_epi16': target specific option mismatch
mult1024.c: _mm256_sub_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:12:17: note: called from here
mult1024.c: #define sub_x16 _mm256_sub_epi16
mult1024.c: ^
mult1024.c: mult1024.c:20:10: note: in expansion of macro 'sub_x16'
mult1024.c: return sub_x16(x,mullo_x16(mulhrs_x16(x,const_x16(6)),const_x16(5167)));
mult1024.c: ^~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:553:1: error: inlining failed in call to always_inline '_mm256_mullo_epi16': target specific option mismatch
mult1024.c: _mm256_mullo_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~~~
mult1024.c: ...
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: mult1024.c:19:1: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
mult1024.c: {
mult1024.c: ^
mult1024.c: mult1024.c: In function 'squeeze_7681_x16':
mult1024.c: mult1024.c:23:17: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
mult1024.c: static int16x16 squeeze_7681_x16(int16x16 x)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:808:1: error: inlining failed in call to always_inline '_mm256_sub_epi16': target specific option mismatch
mult1024.c: _mm256_sub_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:12:17: note: called from here
mult1024.c: #define sub_x16 _mm256_sub_epi16
mult1024.c: ^
mult1024.c: mult1024.c:20:10: note: in expansion of macro 'sub_x16'
mult1024.c: return sub_x16(x,mullo_x16(mulhrs_x16(x,const_x16(6)),const_x16(5167)));
mult1024.c: ^~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:553:1: error: inlining failed in call to always_inline '_mm256_mullo_epi16': target specific option mismatch
mult1024.c: _mm256_mullo_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~~~
mult1024.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer avx
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer avx
gcc -m64 -march=barcelona -O -fomit-frame-pointer avx
gcc -m64 -march=barcelona -Os -fomit-frame-pointer avx

Compiler output

Implementation: avx
Security model: unknown
Compiler: gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: mult1024.c: In function 'squeeze_5167_x16':
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:808:1: error: inlining failed in call to always_inline '_mm256_sub_epi16': target specific option mismatch
mult1024.c: _mm256_sub_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:12:17: note: called from here
mult1024.c: #define sub_x16 _mm256_sub_epi16
mult1024.c: ^
mult1024.c: mult1024.c:20:10: note: in expansion of macro 'sub_x16'
mult1024.c: return sub_x16(x,mullo_x16(mulhrs_x16(x,const_x16(6)),const_x16(5167)));
mult1024.c: ^~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: /usr/lib/gcc/x86_64-linux-gnu/7/include/avx2intrin.h:553:1: error: inlining failed in call to always_inline '_mm256_mullo_epi16': target specific option mismatch
mult1024.c: _mm256_mullo_epi16 (__m256i __A, __m256i __B)
mult1024.c: ^~~~~~~~~~~~~~~~~~
mult1024.c: mult1024.c:12:17: note: called from here
mult1024.c: #define sub_x16 _mm256_sub_epi16
mult1024.c: ^
mult1024.c: mult1024.c:20:10: note: in expansion of macro 'sub_x16'
mult1024.c: return sub_x16(x,mullo_x16(mulhrs_x16(x,const_x16(6)),const_x16(5167)));
mult1024.c: ^~~~~~~
mult1024.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/7/include/immintrin.h:43:0,
mult1024.c: from mult1024.c:2:
mult1024.c: ...

Number of similar (compiler,implementation) pairs: 16, namely:
CompilerImplementations
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -O -fomit-frame-pointer avx
gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -O -fomit-frame-pointer avx
gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer avx
gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer avx
gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer avx
gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer avx
gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer avx
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv avx
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv avx
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv avx
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv avx