Implementation notes: amd64, kizomba, crypto_core/mult3sntrup761

Computer: kizomba
Architecture: amd64
CPU ID: GenuineIntel-000906e9-bfebfbff
SUPERCOP version: 20211108
Operation: crypto_core
Primitive: mult3sntrup761
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
666746988 0 062358 776 832avx2unsignedgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
675257554 0 071585 776 776avx2unsignedclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
680832777 0 046657 776 776avx2unsignedclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
681132777 0 046657 776 776avx2unsignedclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
694014302 0 029614 776 832avx800gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
701216444 0 030633 776 776avx800clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
702216444 0 030633 776 776avx800clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
703518798 0 030633 776 776avxclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
705219534 0 031369 776 776avxclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
705918798 0 030633 776 776avxclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
707917164 0 031353 776 776avx800clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
746316722 0 029569 776 776round2clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
747416722 0 029569 776 776round2clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
749817442 0 030289 776 776round2clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
780416006 0 031326 776 832avxgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
820921017 0 031391 768 760avx2unsignedclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
850825446 0 037437 768 832avx2unsignedgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
888314365 0 029702 776 832round2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
895027844 0 039733 768 832avx2unsignedgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
1073713175 0 024081 752 800avx2unsignedgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
1102414900 0 023415 768 760avxclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
1107213162 0 023687 768 760avx800clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
1169610963 0 020959 768 760round2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2021052920210529
1217913768 0 025613 768 832avx800gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1292515665 0 027517 768 832avxgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1490512292 0 024157 768 832round2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1535913422 0 025357 768 832avx800gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1618915126 0 027069 768 832avxgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1727912077 0 024037 768 832round2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1751013292 0 024105 752 800avx800gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1772013284 0 024097 752 800avxgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
1937112458 0 023297 752 800round2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2021052920210529
2517314664 0 029998 776 832round1gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
2804412041 0 026353 776 776round1clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2820610461 0 024761 776 776round1clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2882510461 0 024761 776 776round1clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
313598029 0 018703 768 760round1clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
324328698 0 020573 768 832round1gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
333249233 0 021205 768 832round1gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
443109000 0 019825 752 800round1gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
1630623374 0 018718 776 83232gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
175139953 0 012885 768 83232gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
1787921057 0 012901 768 83232gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
206056887 0 011665 752 80032gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
2064843377 0 017665 776 77632clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2066493377 0 017665 776 77632clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
208192896 0 011511 768 76032clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2084233377 0 017665 776 77632clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2144903066 0 016633 776 76032clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2247502862 0 017137 776 776refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2271372862 0 017137 776 776refclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2272702862 0 017137 776 776refclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
2967442820 0 018158 776 832refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
5467122722 0 016289 776 760refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
908514452 0 011071 768 760refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020090720200906
1216449591 0 012517 768 832refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
2478926658 0 012501 768 832refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906
2498682546 0 011321 752 800refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020090720200906

Compiler output

Implementation: avx
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx

Compiler output

Implementation: avx2unsigned
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult.c: mult.c:48:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c: ^
mult.c: mult.c:48:59: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c: ^
mult.c: mult.c:51:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c: ^
mult.c: mult.c:51:59: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c: ^
mult.c: mult.c:64:5: error: always_inline function '_mm256_store_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c: ^
mult.c: mult.c:64:54: error: always_inline function '_mm256_load_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'sse4.2'
mult.c: _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c: ^
mult.c: 6 errors generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx2unsigned

Compiler output

Implementation: avx800
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE avx800

Compiler output

Implementation: round1
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult.c: mult.c:146:22: error: invalid output size for constraint '=&x'
mult.c: MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:148:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c: ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c: ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c: ^
mult.c: mult.c:154:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c: ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c: MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c: ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE round1

Compiler output

Implementation: round2
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
mult768.c: mult768.c:208:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: x = const_x16(0);
mult768.c: ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c: ^
mult768.c: mult768.c:209:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c: ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ^
mult768.c: mult768.c:227:20: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: int16x16 fgi = load_x16(&fg[i]);
mult768.c: ^
mult768.c: mult768.c:8:21: note: expanded from macro 'load_x16'
mult768.c: #define load_x16(p) _mm256_loadu_si256((int16x16 *) (p))
mult768.c: ^
mult768.c: mult768.c:228:21: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'sse4.2'
mult768.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE round2