Implementation notes: amd64, renoir, crypto_kem/kyber768

Computer: renoir
Microarchitecture: amd64; Zen 2 (860f01)
Architecture: amd64
CPU ID: AuthenticAMD-00860f01-178bfbff
SUPERCOP version: 20240625
Operation: crypto_kem
Primitive: kyber768
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
97649138706 0 0159296 820 1720avx2clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
97890134651 0 0155064 820 1720avx2clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
101392127108 0 0145094 812 1720avx2clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
104626152177 0 0173496 788 1752avx2gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
108296128274 0 0146894 812 1720avx2clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
109872125914 0 0145120 788 1752avx2gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
110444128788 0 0148480 788 1752avx2gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
111879125269 0 0143416 780 1720avx2gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
39649175098 0 0104592 820 1720compactclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
41659542834 0 072192 820 1720compactclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
42613365279 0 095472 788 1752compactgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
45946234840 0 055760 820 1720refclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
47060947624 0 077592 820 1720compactclang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
50329923056 0 043800 820 1720refclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
50721712222 0 030622 812 1720refclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
50950231379 0 052664 820 1720refclang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
5126236677 0 034214 812 1720compactclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
52465040545 0 061808 788 1752refgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
56930712973 0 032688 788 1752refgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
56946311922 0 031080 788 1752refgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
5805846171 0 034752 788 1752compactgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
60153911245 0 029360 780 1720refgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
6548315861 0 034032 788 1752compactgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
66098913076 0 032030 812 1720refclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
7699817608 0 035686 812 1720compactclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625
8835565247 0 032344 780 1720compactgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062920240625

Compiler output


KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'avx', but would be inlined into function 'crypto_kem_kyber768_avx2_constbranchindex_KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx'
KeccakP-1600-times4-SIMD256.c:         Xor_In4( 0 );
KeccakP-1600-times4-SIMD256.c:         ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c:     #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c:                                          ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c:     #define LOAD256u(a)             _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c:                                     ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:135:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c:     #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\
KeccakP-1600-times4-SIMD256.c:                                          ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c:     #define LOAD256u(a)             _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c:                                     ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: always_inline function '_mm256_loadu_si256' requires target feature 'avx', but would be inlined into function 'crypto_kem_kyber768_avx2_constbranchindex_KeccakP1600times4_AddLanesAll' that is compiled without support for 'avx'
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:136:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c:                                 lanes1 = LOAD256u( curData1[argIndex]),\
KeccakP-1600-times4-SIMD256.c:                                          ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:44:37: note: expanded from macro 'LOAD256u'
KeccakP-1600-times4-SIMD256.c:     #define LOAD256u(a)             _mm256_loadu_si256((const V256 *)&(a))
KeccakP-1600-times4-SIMD256.c:                                     ^
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:146:9: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
KeccakP-1600-times4-SIMD256.c: KeccakP-1600-times4-SIMD256.c:136:42: note: expanded from macro 'Xor_In4'
KeccakP-1600-times4-SIMD256.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
avx2clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)

TIMECOP error (can be valgrind bug)


Process terminating with default action of signal 4 (SIGILL)
 Illegal opcode at address 0x40890C
   at 0x...: poly_compress (kem.c:126)
   by 0x...: pack_ciphertext (kem.c:311)
   by 0x...: indcpa_enc (kem.c:399)
   by 0x...: crypto_kem_kyber768_compact_constbranchindex_enc (kem.c:444)
   by 0x...: test (try.c:141)
   by 0x...: main (try-anything.c:345)

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
compactclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)

TIMECOP error (can be valgrind bug)


Process terminating with default action of signal 4 (SIGILL)
 Illegal opcode at address 0x40B9F2
   at 0x...: poly_compress (kem.c:126)
   by 0x...: pack_ciphertext (kem.c:311)
   by 0x...: indcpa_enc (kem.c:399)
   by 0x...: crypto_kem_kyber768_compact_constbranchindex_enc (kem.c:444)
   by 0x...: test (try.c:141)
   by 0x...: main (try-anything.c:345)

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
compactclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)

TIMECOP error (can be valgrind bug)


Process terminating with default action of signal 4 (SIGILL)
 Illegal opcode at address 0x405F41
   at 0x...: crypto_kem_kyber768_ref_constbranchindex_poly_tomsg (poly.c:197)
   by 0x...: crypto_kem_kyber768_ref_constbranchindex_indcpa_dec (indcpa.c:330)
   by 0x...: crypto_kem_kyber768_ref_constbranchindex_dec (kem.c:106)
   by 0x...: test (try.c:160)
   by 0x...: main (try-anything.c:345)

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
refclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)

TIMECOP error (can be valgrind bug)


Process terminating with default action of signal 4 (SIGILL)
 Illegal opcode at address 0x40677B
   at 0x...: crypto_kem_kyber768_ref_constbranchindex_poly_tomsg (poly.c:197)
   by 0x...: crypto_kem_kyber768_ref_constbranchindex_indcpa_dec (indcpa.c:330)
   by 0x...: crypto_kem_kyber768_ref_constbranchindex_dec (kem.c:106)
   by 0x...: test (try.c:160)
   by 0x...: main (try-anything.c:345)

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
refclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)

Passed TIMECOP


TIMECOP iterations: 1

Number of similar (implementation,compiler) pairs: 22, namely:
ImplementationCompiler
avx2clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
avx2clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
avx2clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
avx2clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
avx2gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
avx2gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
avx2gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
avx2gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
compactclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
compactclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
compactclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
compactgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
compactgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
compactgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
compactgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
refclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
refclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
refclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_11.0.1)
refgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
refgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
refgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)
refgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (10.2.1_20210110)