Implementation notes: amd64, samba, crypto_core/mult3sntrup761

Computer: samba
Architecture: amd64
CPU ID: GenuineIntel-000506e3-bfebfbff
SUPERCOP version: 20211108
Operation: crypto_core
Primitive: mult3sntrup761
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
650844402 0 059977 804 976avx2unsignedgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
677457554 0 072028 804 944avx2unsignedclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
679232777 0 047100 804 944avx2unsignedclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
680032777 0 047100 804 944avx2unsignedclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
695914254 0 029769 804 976avx800gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
696914254 0 029769 804 976avxgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
707216444 0 031076 804 944avx800clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
708116444 0 031076 804 944avx800clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
711417164 0 031796 804 944avx800clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
711718798 0 031076 804 944avxclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
712318798 0 031076 804 944avxclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
718519534 0 031812 804 944avxclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
748617442 0 030732 804 944round2clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
749416722 0 030012 804 944round2clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
749612621 0 028161 804 976round2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
757416722 0 030012 804 944round2clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
852021017 0 031834 796 912avx2unsignedclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
875127138 0 039400 796 976avx2unsignedgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
928924914 0 037280 796 976avx2unsignedgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1048313173 0 024516 780 944avx2unsignedgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1108113162 0 024130 796 912avx800clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
1109214900 0 023858 796 912avxclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
1150510963 0 021402 796 912round2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
1429010718 0 022944 796 976round2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1519113636 0 025856 796 976avxgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1527113469 0 025680 796 976avx800gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1597613406 0 025720 796 976avxgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1598213406 0 025720 796 976avx800gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1601610813 0 023152 796 976round2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1745513289 0 024548 780 944avx800gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1755713260 0 024508 780 944avxgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1985112455 0 023724 780 944round2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
2588014632 0 030177 804 976round1gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2848210461 0 025204 804 944round1clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2850312041 0 026796 804 944round1clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2899310461 0 025204 804 944round1clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
324788029 0 019146 796 912round1clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
348189233 0 021649 804 976round1gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
351208693 0 021017 804 976round1gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
451718998 0 020252 780 944round1gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
172753953 0 013264 796 97632gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1793891057 0 013272 796 97632gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1985813303 0 018841 804 97632gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
204538896 0 011954 796 91232clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2066143377 0 018108 804 94432clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2066683377 0 018108 804 94432clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2077583377 0 018108 804 94432clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2086833066 0 017076 804 91232clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
224096887 0 012108 780 94432gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2287362862 0 017580 804 944refclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2287972862 0 017580 804 944refclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2306922862 0 017580 804 944refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2989932737 0 018281 804 976refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
5435562722 0 016732 804 912refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
1052524452 0 011514 796 912refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
1202562591 0 012896 796 976refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2480526658 0 012872 796 976refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2499982546 0 011764 780 944refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114

Compiler output

Implementation: avx
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx

Compiler output

Implementation: avx2unsigned
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult.c: mult.c:48:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c: ^
mult.c: mult.c:48:59: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c: ^
mult.c: mult.c:51:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c: ^
mult.c: mult.c:51:59: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c: ^
mult.c: mult.c:64:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c: ^
mult.c: mult.c:64:54: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c: ^
mult.c: 6 errors generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx2unsigned

Compiler output

Implementation: avx800
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx800

Compiler output

Implementation: round1
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult.c: mult.c:146:22: error: invalid output size for constraint '=&x'
mult.c: MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:148:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c: ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c: ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:154:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE round1

Compiler output

Implementation: round2
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE round2