Implementation notes: amd64, titan0, crypto_core/mult3sntrup761

Computer: titan0
Architecture: amd64
CPU ID: GenuineIntel-000306c3-bfebfbff
SUPERCOP version: 20211108
Operation: crypto_core
Primitive: mult3sntrup761
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
704144402 0 059977 804 976avx2unsignedgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
788032841 0 047371 820 944avx2unsignedclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
809532841 0 047371 820 944avx2unsignedclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
840257214 0 071899 820 944avx2unsignedclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
840214254 0 029769 804 976avx800gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
845814254 0 029769 804 976avxgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
864918798 0 031283 820 944avxclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
867118798 0 031283 820 944avxclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
868219534 0 032019 820 944avxclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
874716444 0 031283 820 944avx800clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
876216444 0 031283 820 944avx800clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
880617164 0 032003 820 944avx800clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
885012621 0 028161 804 976round2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
895817442 0 030939 820 944round2clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
897816722 0 030219 820 944round2clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
898616722 0 030219 820 944round2clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
950021015 0 031865 812 912avx2unsignedclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
961024914 0 037280 796 976avx2unsignedgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
983727138 0 039400 796 976avx2unsignedgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1179113173 0 024516 780 944avx2unsignedgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1291114900 0 023889 812 912avxclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
1293013162 0 024177 812 912avx800clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
1330110966 0 021433 812 912round2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021053020210529
1366813636 0 025856 796 976avxgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1380413469 0 025680 796 976avx800gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1445010718 0 022944 796 976round2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1680313406 0 025720 796 976avx800gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1694013406 0 025720 796 976avxgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1758810813 0 023152 796 976round2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1827813289 0 024548 780 944avx800gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
1847613260 0 024508 780 944avxgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
2068812455 0 023724 780 944round2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021053020210529
2852714632 0 030177 804 976round1gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
3226210541 0 025491 820 944round1clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
3248610541 0 025491 820 944round1clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
3249312137 0 027099 820 944round1clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
363498073 0 019225 812 912round1clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
368418693 0 021017 804 976round1gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
376499233 0 021649 804 976round1gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
477938998 0 020252 780 944round1gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1631773303 0 018841 804 97632gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
176721953 0 013264 796 97632gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
1795751057 0 013272 796 97632gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
204549896 0 011985 812 91232clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2059413378 0 018315 820 94432clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2059653378 0 018315 820 94432clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2061953378 0 018315 820 94432clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2072523066 0 017283 820 91232clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
208110887 0 012108 780 94432gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2401362861 0 017787 820 944refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2402862861 0 017787 820 944refclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
2473592861 0 017787 820 944refclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
3357092737 0 018281 804 976refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
5432572722 0 016939 820 912refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
933226452 0 011545 812 912refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021011420210114
1205280591 0 012896 796 976refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2472850658 0 012872 796 976refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114
2498682546 0 011764 780 944refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021011420210114

Compiler output

Implementation: avx
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx

Compiler output

Implementation: avx2unsigned
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult.c: mult.c:48:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c: ^
mult.c: mult.c:48:59: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c: ^
mult.c: mult.c:51:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c: ^
mult.c: mult.c:51:59: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c: ^
mult.c: mult.c:64:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c: ^
mult.c: mult.c:64:54: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c: ^
mult.c: 6 errors generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx2unsigned

Compiler output

Implementation: avx800
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx800

Compiler output

Implementation: round1
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult.c: mult.c:146:22: error: invalid output size for constraint '=&x'
mult.c: MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:148:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c: ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c: ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:154:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE round1

Compiler output

Implementation: round2
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE round2