Implementation notes: amd64, comet, crypto_kem/mceliece460896
Computer: comet
Microarchitecture: amd64; Comet Lake (806ec)
Architecture: amd64
CPU ID: GenuineIntel-000806ec-bfebfbff
SUPERCOP version: 20240625
Operation: crypto_kem
Primitive: mceliece460896
Time | Object size | Test size | Implementation | Compiler | Benchmark date | SUPERCOP version |
305273 | 130442 0 0 | 184156 788 1760 | avx | gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
309204 | 147612 0 0 | 202625 876 1792 | avx | clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
310905 | 130989 0 0 | 185529 876 1728 | avx | clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
334936 | 86246 0 0 | 138124 788 1760 | avx | gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
336338 | 84870 0 0 | 136471 860 1792 | avx | clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
349310 | 88094 0 0 | 139297 868 1728 | avx | clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
349476 | 84131 0 0 | 135428 788 1760 | avx | gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
354667 | 79365 0 0 | 129356 780 1728 | avx | gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
495679 | 157416 0 0 | 208180 788 1760 | sse | gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
500367 | 117109 0 0 | 167593 860 1728 | sse | clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
503098 | 130383 0 0 | 181169 860 1792 | sse | clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
538514 | 83638 0 0 | 132492 788 1760 | sse | gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
554875 | 75896 0 0 | 123991 852 1792 | sse | clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
557226 | 76287 0 0 | 124588 788 1760 | sse | gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
561360 | 80116 0 0 | 127305 860 1728 | sse | clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
570286 | 72213 0 0 | 119204 780 1728 | sse | gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
1342122 | 88529 0 0 | 142580 788 1760 | vec | gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
1649760 | 104631 0 0 | 158617 860 1728 | vec | clang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
1740803 | 85050 0 0 | 140177 860 1728 | vec | clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
1788598 | 111454 0 0 | 165881 860 1792 | vec | clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
2063054 | 48302 0 0 | 100343 860 1792 | vec | clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
2302194 | 51063 0 0 | 103276 788 1760 | vec | gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
2304411 | 47094 0 0 | 98897 868 1728 | vec | clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
2711938 | 38290 0 0 | 88916 780 1728 | vec | gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
2981615 | 48424 0 0 | 100316 788 1760 | vec | gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240710 | 20240625 |
139910525 | 44896 0 0 | 96119 868 1728 | ref | clang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
152553270 | 37655 0 0 | 89873 860 1728 | ref | clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
155590151 | 61810 0 0 | 114289 860 1792 | ref | clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
161043751 | 16268 0 0 | 65781 860 1792 | ref | clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
185178164 | 32359 0 0 | 83838 796 1760 | ref | gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
194414426 | 14683 0 0 | 63601 860 1728 | ref | clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
207197197 | 16213 0 0 | 65822 796 1760 | ref | gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
211225454 | 14249 0 0 | 63172 788 1760 | ref | gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
234990454 | 11238 0 0 | 58910 788 1728 | ref | gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall | 20240709 | 20240625 |
Compiler output
bm.c: bm.c:78:37: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: for (i = 13; i < 16; i++) buf[i] = vec256_setzero();
bm.c: ^
bm.c: bm.c:80:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[0][0] = vec256_set1_16b(0x5555);
bm.c: ^
bm.c: bm.c:81:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[0][1] = vec256_set1_16b(0xAAAA);
bm.c: ^
bm.c: bm.c:82:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[1][0] = vec256_set1_16b(0x3333);
bm.c: ^
bm.c: bm.c:83:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[1][1] = vec256_set1_16b(0xCCCC);
bm.c: ^
bm.c: bm.c:84:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[2][0] = vec256_set1_16b(0x0F0F);
bm.c: ^
bm.c: bm.c:85:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[2][1] = vec256_set1_16b(0xF0F0);
bm.c: ^
bm.c: bm.c:86:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: mask[3][0] = vec256_set1_16b(0x00FF);
bm.c: ^
bm.c: bm.c:87:15: warning: AVX vector return of type 'vec256' (aka '__m256i') without 'avx' enabled changes the ABI [-Wpsabi]
bm.c: ...
Number of similar (implementation,compiler) pairs: 1, namely:
Implementation | Compiler |
avx | clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
Compiler output
pk_gen.c: pk_gen.c:211:12: warning: variable length array folded to constant array as an extension [-Wgnu-folding-constant]
pk_gen.c: uint64_t w[ PK_NROWS ][ nBlocks_I*2 ];
pk_gen.c: ^
pk_gen.c: pk_gen.c:212:12: warning: variable length array folded to constant array as an extension [-Wgnu-folding-constant]
pk_gen.c: vec128 v[ PK_NROWS ][ nBlocks_I ];
pk_gen.c: ^
pk_gen.c: 2 warnings generated.
Number of similar (implementation,compiler) pairs: 4, namely:
Implementation | Compiler |
sse | clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
sse | clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
sse | clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
sse | clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
Compiler output
decrypt.c: In file included from decrypt.c:9:
decrypt.c: In file included from ./vec128_gf.h:10:
decrypt.c: ./vec128.h:33:9: error: always_inline function '_mm_testz_si128' requires target feature 'sse4.1', but would be inlined into function 'vec128_testz' that is compiled without support for 'sse4.1'
decrypt.c: return _mm_testz_si128(a, a);
decrypt.c: ^
decrypt.c: 1 error generated.
Number of similar (implementation,compiler) pairs: 1, namely:
Implementation | Compiler |
sse | clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
TIMECOP error (can be valgrind bug)
error 1
VEX temporary storage exhausted.
Pool = TEMP, start 0x597c5528 curr 0x59c72348 end 0x59c8a067 (size 5000000)
vex: the `impossible' happened:
VEX temporary storage exhausted.
Increase N_{TEMPORARY,PERMANENT}_BYTES and recompile.
vex storage: T total 292926600 bytes allocated
vex storage: P total 512 bytes allocated
valgrind: the 'impossible' happened:
LibVEX called failure_exit().
host stacktrace:
at 0x...: show_sched_status_wrk (m_libcassert.c:406)
by 0x...: report_and_quit (m_libcassert.c:477)
by 0x...: panic (m_libcassert.c:553)
by 0x...: vgPlain_core_panic_at (m_libcassert.c:558)
by 0x...: vgPlain_core_panic (m_libcassert.c:563)
by 0x...: failure_exit (m_translate.c:761)
by 0x...: vpanic (main_util.c:253)
by 0x...: private_LibVEX_alloc_OOM (main_util.c:181)
by 0x...: LibVEX_Alloc_inline (main_util.h:176)
by 0x...: addHInstr_SLOW (host_generic_regs.c:332)
by 0x...: emit_instr (host_generic_reg_alloc3.c:301)
by 0x...: doRegisterAllocation_v3 (host_generic_reg_alloc3.c:1320)
by 0x...: libvex_BackEnd (main_main.c:1133)
by 0x...: LibVEX_Translate (main_main.c:1236)
by 0x...: vgPlain_translate (m_translate.c:1831)
by 0x...: handle_chain_me (scheduler.c:1169)
by 0x...: vgPlain_scheduler (scheduler.c:1514)
by 0x...: thread_wrapper (syswrap-linux.c:101)
by 0x...: run_a_thread_NORETURN (syswrap-linux.c:154)
sched status:
running_tid=1
Thread 1: status = VgTs_Runnable (lwpid 2722657)
at 0x...: extract_gf (sk_gen.c:43)
by 0x...: crypto_kem_mceliece460896_avx_constbranchindex_genpoly_gen (sk_gen.c:153)
by 0x...: crypto_kem_mceliece460896_avx_constbranchindex_operation_keypair (kem_keypair.c:57)
by 0x...: crypto_kem_mceliece460896_avx_constbranchindex_keypair (wrap_keypair.c:10)
by 0x...: test (try.c:126)
by 0x...: main (try-anything.c:345)
client stack range: [0x1FFEFDF000 0x1FFF000FFF] client SP: 0x1FFEFE0160
valgrind stack range: [0x10090BB000 0x10091BAFFF] top usage: 18792 of 1048576
Note: see also the FAQ in the source distribution.
It contains workarounds to several common problems.
In particular, if Valgrind aborted or crashed after
identifying problems in your program, there's a good chance
that fixing those problems will prevent Valgrind aborting or
crashing, especially if it happened in m_mallocfree.c.
If that doesn't help, please report this bug to: www.valgrind.org
In the bug report, send all the above text, the valgrind
version, and what OS and version you are using. Thanks.
Number of similar (implementation,compiler) pairs: 1, namely:
Implementation | Compiler |
avx | gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
TIMECOP error (can be valgrind bug)
error 111
Process terminating with default action of signal 4 (SIGILL)
Illegal opcode at address 0x1227E4
at 0x...: vec128_set2x (vec128.h:56)
by 0x...: postprocess (fft_tr.c:283)
by 0x...: crypto_kem_mceliece460896_sse_constbranchindex_fft_tr (fft_tr.c:292)
by 0x...: crypto_kem_mceliece460896_sse_constbranchindex_decrypt (decrypt.c:175)
by 0x...: crypto_kem_mceliece460896_sse_constbranchindex_operation_dec (kem_dec.c:36)
by 0x...: test (try.c:160)
by 0x...: main (try-anything.c:345)
Number of similar (implementation,compiler) pairs: 1, namely:
Implementation | Compiler |
sse | clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
TIMECOP error (can be valgrind bug)
error 1
VEX temporary storage exhausted.
Pool = TEMP, start 0x597c5528 curr 0x59c72348 end 0x59c8a067 (size 5000000)
vex: the `impossible' happened:
VEX temporary storage exhausted.
Increase N_{TEMPORARY,PERMANENT}_BYTES and recompile.
vex storage: T total 294918936 bytes allocated
vex storage: P total 512 bytes allocated
valgrind: the 'impossible' happened:
LibVEX called failure_exit().
host stacktrace:
at 0x...: show_sched_status_wrk (m_libcassert.c:406)
by 0x...: report_and_quit (m_libcassert.c:477)
by 0x...: panic (m_libcassert.c:553)
by 0x...: vgPlain_core_panic_at (m_libcassert.c:558)
by 0x...: vgPlain_core_panic (m_libcassert.c:563)
by 0x...: failure_exit (m_translate.c:761)
by 0x...: vpanic (main_util.c:253)
by 0x...: private_LibVEX_alloc_OOM (main_util.c:181)
by 0x...: LibVEX_Alloc_inline (main_util.h:176)
by 0x...: addHInstr_SLOW (host_generic_regs.c:332)
by 0x...: emit_instr (host_generic_reg_alloc3.c:301)
by 0x...: doRegisterAllocation_v3 (host_generic_reg_alloc3.c:1320)
by 0x...: libvex_BackEnd (main_main.c:1133)
by 0x...: LibVEX_Translate (main_main.c:1236)
by 0x...: vgPlain_translate (m_translate.c:1831)
by 0x...: handle_chain_me (scheduler.c:1169)
by 0x...: vgPlain_scheduler (scheduler.c:1514)
by 0x...: thread_wrapper (syswrap-linux.c:101)
by 0x...: run_a_thread_NORETURN (syswrap-linux.c:154)
sched status:
running_tid=1
Thread 1: status = VgTs_Runnable (lwpid 2735284)
at 0x...: extract_gf (sk_gen.c:43)
by 0x...: crypto_kem_mceliece460896_sse_constbranchindex_genpoly_gen (sk_gen.c:153)
by 0x...: crypto_kem_mceliece460896_sse_constbranchindex_operation_keypair (kem_keypair.c:57)
by 0x...: crypto_kem_mceliece460896_sse_constbranchindex_keypair (wrap_keypair.c:10)
by 0x...: test (try.c:126)
by 0x...: main (try-anything.c:345)
client stack range: [0x1FFEFDF000 0x1FFF000FFF] client SP: 0x1FFEFE0160
valgrind stack range: [0x1008BAE000 0x1008CADFFF] top usage: 18792 of 1048576
Note: see also the FAQ in the source distribution.
It contains workarounds to several common problems.
In particular, if Valgrind aborted or crashed after
identifying problems in your program, there's a good chance
that fixing those problems will prevent Valgrind aborting or
crashing, especially if it happened in m_mallocfree.c.
If that doesn't help, please report this bug to: www.valgrind.org
In the bug report, send all the above text, the valgrind
version, and what OS and version you are using. Thanks.
Number of similar (implementation,compiler) pairs: 1, namely:
Implementation | Compiler |
sse | gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
Passed TIMECOP
TIMECOP iterations: 1
Number of similar (implementation,compiler) pairs: 31, namely:
Implementation | Compiler |
avx | clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
avx | clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
avx | clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
avx | clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
avx | gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
avx | gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
avx | gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
ref | clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
ref | clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
ref | clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
ref | clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
ref | clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
ref | gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
ref | gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
ref | gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
ref | gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
sse | clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
sse | clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
sse | clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
sse | gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
sse | gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
sse | gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
vec | clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
vec | clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
vec | clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
vec | clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
vec | clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_14.0.6) |
vec | gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
vec | gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
vec | gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |
vec | gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (12.2.0) |