Implementation notes: amd64, zen3, crypto_kem/bikel1

Computer: zen3
Architecture: amd64
CPU ID: AuthenticAMD-00a20f10-178bfbff
SUPERCOP version: 20211108
Operation: crypto_kem
Primitive: bikel1
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
144820786642 72 4107992 924 1756T:ches2021gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1502289296120 72 4323852 948 1724T:ches2021clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1516985400906 72 4428588 948 1724T:ches2021clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
172979560141 72 479720 924 1756T:ches2021gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
177280048806 72 468106 940 1724T:ches2021clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
177663556381 72 476212 948 1724T:ches2021clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
184342856835 72 475728 924 1756T:ches2021gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
207197645481 64 466968 916 1756T:avx2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
208566944397 72 462324 916 1724T:ches2021gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
211507495082 64 4122852 940 1724T:avx2clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
2122266144388 64 4172076 940 1724T:avx2clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
2741898159105 56 4186772 932 1724T:aes-ni-and-pclmulclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
405024919780 64 439716 940 1724T:avx2clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
405292226424 64 445464 916 1756T:avx2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
436243623665 64 441652 908 1724T:avx2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
461460127007 64 446648 916 1756T:avx2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
470320398272 56 4126124 932 1724T:aes-ni-and-pclmulclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
523636117085 64 436498 932 1724T:avx2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
590889967079 56 488568 908 1756T:aes-ni-and-pclmulgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
795027618361 56 438252 932 1724T:aes-ni-and-pclmulclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
796050033093 56 451952 908 1756T:aes-ni-and-pclmulgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
821833834082 56 453616 908 1756T:aes-ni-and-pclmulgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
844628322592 56 440484 900 1724T:aes-ni-and-pclmulgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
895760416333 56 435658 924 1724T:aes-ni-and-pclmulclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
9657398158863 56 4186508 932 1724T:aes-ni-onlyclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
9734360154289 48 4182610 980 1724T:portableclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1058832132630 48 455858 980 1724T:portableclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1121680998030 56 4125924 932 1724T:aes-ni-onlyclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1126271093424 48 4121962 980 1724T:portableclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1230523165312 48 487482 956 1756T:portablegcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1240494068935 56 490416 908 1756T:aes-ni-onlygcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1520471019914 48 438430 948 1724T:portablegcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1532675422272 56 440116 900 1724T:aes-ni-onlygcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1546840632277 48 452530 956 1756T:portablegcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1547121134978 56 454488 908 1756T:aes-ni-onlygcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1557029031555 48 451122 956 1756T:portablegcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1570326915440 48 436010 980 1724T:portableclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1573074817755 56 437636 932 1724T:aes-ni-onlyclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1576132433953 56 452824 908 1756T:aes-ni-onlygcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022020720211108
1603071615882 56 435186 924 1724T:aes-ni-onlyclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108
1650445313703 48 433672 972 1724T:portableclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022020720211108

Compiler output

Implementation: T:aes-ni-and-pclmul
Security model: timingleaks
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
aes.c: aes.c:9:4: error: "This code requries support for AES_NI and SSSE3"
aes.c: # error "This code requries support for AES_NI and SSSE3"
aes.c: ^
aes.c: 1 error generated.

Number of similar (compiler,implementation) pairs: 6, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:aes-ni-and-pclmul T:aes-ni-only T:avx2 T:avx512 T:avx512-vpclmul T:ches2021

Compiler output

Implementation: T:avx512
Security model: timingleaks
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
decode.c: In file included from decode.c:39:
decode.c: ./gf2x.h:22:10: error: always_inline function '_mm512_loadu_si512' requires target feature 'avx512f', but would be inlined into function 'gf2x_mod_add' that is compiled without support for 'avx512f'
decode.c: va = LOAD(&a_qwords[i]);
decode.c: ^
decode.c: ./x86_64_intrinsic.h:40:27: note: expanded from macro 'LOAD'
decode.c: # define LOAD(mem) _mm512_loadu_si512((mem))
decode.c: ^
decode.c: In file included from decode.c:39:
decode.c: ./gf2x.h:22:10: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled changes the ABI
decode.c: ./x86_64_intrinsic.h:40:27: note: expanded from macro 'LOAD'
decode.c: # define LOAD(mem) _mm512_loadu_si512((mem))
decode.c: ^
decode.c: In file included from decode.c:39:
decode.c: ./gf2x.h:23:10: error: always_inline function '_mm512_loadu_si512' requires target feature 'avx512f', but would be inlined into function 'gf2x_mod_add' that is compiled without support for 'avx512f'
decode.c: vb = LOAD(&b_qwords[i]);
decode.c: ^
decode.c: ./x86_64_intrinsic.h:40:27: note: expanded from macro 'LOAD'
decode.c: # define LOAD(mem) _mm512_loadu_si512((mem))
decode.c: ^
decode.c: In file included from decode.c:39:
decode.c: ./gf2x.h:23:10: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'avx512f' enabled changes the ABI
decode.c: ./x86_64_intrinsic.h:40:27: note: expanded from macro 'LOAD'
decode.c: # define LOAD(mem) _mm512_loadu_si512((mem))
decode.c: ^
decode.c: In file included from decode.c:39:
decode.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512-vpclmul
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512-vpclmul
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512-vpclmul
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx512-vpclmul

Compiler output

Implementation: T:avx512
Security model: timingleaks
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
decode.c: In file included from decode.c:39:
decode.c: gf2x.h: In function ‘gf2x_mod_add’:
decode.c: gf2x.h:22:8: warning: AVX512F vector return without AVX512F enabled changes the ABI [-Wpsabi]
decode.c: 22 | va = LOAD(&a_qwords[i]);
decode.c: In file included from /usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/include/immintrin.h:49,
decode.c: from x86_64_intrinsic.h:20,
decode.c: from defs.h:103,
decode.c: from bike_defs.h:10,
decode.c: from types.h:13,
decode.c: from decode.h:10,
decode.c: from decode.c:37:
decode.c: /usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/include/avx512fintrin.h:6445:1: error: inlining failed in call to ‘always_inline’ ‘_mm512_storeu_si512’: target specific option mismatch
decode.c: 6445 | _mm512_storeu_si512 (void *__P, __m512i __A)
decode.c: | ^~~~~~~~~~~~~~~~~~~
decode.c: In file included from defs.h:103,
decode.c: from bike_defs.h:10,
decode.c: from types.h:13,
decode.c: from decode.h:10,
decode.c: from decode.c:37:
decode.c: x86_64_intrinsic.h:41:27: note: called from here
decode.c: 41 | # define STORE(mem, reg) _mm512_storeu_si512((mem), (reg))
decode.c: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
decode.c: gf2x.h:25:5: note: in expansion of macro ‘STORE’
decode.c: 25 | STORE(&c_qwords[i], va ^ vb);
decode.c: | ^~~~~
decode.c: ...

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512-vpclmul
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512-vpclmul
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512-vpclmul
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx512-vpclmul