Implementation notes: aarch64, supercoplxc, crypto_core/aes256encrypt

Computer: supercoplxc
Architecture: aarch64
CPU ID: 410fd034
SUPERCOP version: 20190816
Operation: crypto_core
Primitive: aes256encrypt
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
4001420 0 014758 800 792dolbeau/armv8cryptoclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082820190816
8802988 0 013784 880 768dolbeau/std-1ftgcc_-O2_-fomit-frame-pointer2019082820190816
8803004 0 015081 888 784dolbeau/std-1ftgcc_-O3_-fomit-frame-pointer2019082820190816
8802988 0 015856 880 768dolbeau/std-1ftgcc_-funroll-loops_-O2_-fomit-frame-pointer2019082820190816
8803004 0 016457 888 784dolbeau/std-1ftgcc_-funroll-loops_-O3_-fomit-frame-pointer2019082820190816
8803984 0 014784 880 768dolbeau/std-2ftgcc_-O2_-fomit-frame-pointer2019082820190816
8804024 0 016097 888 784dolbeau/std-2ftgcc_-O3_-fomit-frame-pointer2019082820190816
8803984 0 016856 880 768dolbeau/std-2ftgcc_-funroll-loops_-O2_-fomit-frame-pointer2019082820190816
8804024 0 017473 888 784dolbeau/std-2ftgcc_-funroll-loops_-O3_-fomit-frame-pointer2019082820190816
8806068 0 016872 880 768dolbeau/std-4ftgcc_-O2_-fomit-frame-pointer2019082820190816
8806068 0 018944 880 768dolbeau/std-4ftgcc_-funroll-loops_-O2_-fomit-frame-pointer2019082820190816
8806088 0 019545 888 784dolbeau/std-4ftgcc_-funroll-loops_-O3_-fomit-frame-pointer2019082820190816
9602776 0 016110 800 792dolbeau/std-1ftclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082820190816
9603808 0 017142 800 792dolbeau/std-2ftclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082820190816
9605884 0 019222 800 792dolbeau/std-4ftclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082820190816
9606088 0 018169 888 784dolbeau/std-4ftgcc_-O3_-fomit-frame-pointer2019082820190816
10402788 0 016070 800 792dolbeau/std-1ftclang_-O3_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10402788 0 016126 800 792dolbeau/std-1ftclang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10402788 0 016126 800 792dolbeau/std-1ftclang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10402788 0 016126 800 792dolbeau/std-1ftclang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10403820 0 017102 800 792dolbeau/std-2ftclang_-O3_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10403820 0 017158 800 792dolbeau/std-2ftclang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10403820 0 017158 800 792dolbeau/std-2ftclang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10403820 0 017158 800 792dolbeau/std-2ftclang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10405884 0 019166 800 792dolbeau/std-4ftclang_-O3_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10405884 0 019222 800 792dolbeau/std-4ftclang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10405884 0 019222 800 792dolbeau/std-4ftclang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019082820190816
10405884 0 019222 800 792dolbeau/std-4ftclang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019082820190816
1120136 0 013838 880 768opensslcc2019082820190816
112080 0 013580 816 792opensslclang_-O3_-fomit-frame-pointer_-Qunused-arguments2019082820190816
112080 0 013628 816 792opensslclang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019082820190816
112080 0 013628 816 792opensslclang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019082820190816
112080 0 013628 816 792opensslclang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019082820190816
112080 0 013628 816 792opensslclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082820190816
1120136 0 013838 880 768opensslgcc2019082820190816
1120136 0 011134 896 768opensslgcc_-O2_-fomit-frame-pointer2019082820190816
1120136 0 012415 904 784opensslgcc_-O3_-fomit-frame-pointer2019082820190816
1120136 0 011286 896 768opensslgcc_-O_-fomit-frame-pointer2019082820190816
1120136 0 010398 880 760opensslgcc_-Os_-fomit-frame-pointer2019082820190816
1120136 0 011094 896 768opensslgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
1120136 0 012415 904 784opensslgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
1120136 0 011286 896 768opensslgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
1120136 0 010398 880 760opensslgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
1120136 0 013838 880 768opensslgcc_-funroll-loops2019082820190816
1120136 0 013206 896 768opensslgcc_-funroll-loops_-O2_-fomit-frame-pointer2019082820190816
1120136 0 013791 904 784opensslgcc_-funroll-loops_-O3_-fomit-frame-pointer2019082820190816
1120136 0 014510 896 768opensslgcc_-funroll-loops_-O_-fomit-frame-pointer2019082820190816
1120136 0 010462 880 760opensslgcc_-funroll-loops_-Os_-fomit-frame-pointer2019082820190816
1120136 0 013310 896 768opensslgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
1120136 0 013775 904 784opensslgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
1120136 0 014510 896 768opensslgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
1120136 0 010462 880 760opensslgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
12003976 0 014104 864 760dolbeau/std-2ftgcc_-funroll-loops_-Os_-fomit-frame-pointer2019082820190816
12003968 0 016936 880 768dolbeau/std-2ftgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
12003980 0 017417 888 784dolbeau/std-2ftgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
12003976 0 014104 864 760dolbeau/std-2ftgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
12006036 0 018113 888 784dolbeau/std-4ftgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
12006028 0 019000 880 768dolbeau/std-4ftgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
12006036 0 019473 888 784dolbeau/std-4ftgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
12802972 0 016409 888 784dolbeau/std-1ftgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
12806036 0 016096 864 760dolbeau/std-4ftgcc_-Os_-fomit-frame-pointer2019082820190816
12806028 0 016784 880 768dolbeau/std-4ftgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
12806036 0 016096 864 760dolbeau/std-4ftgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
12806036 0 016160 864 760dolbeau/std-4ftgcc_-funroll-loops_-Os_-fomit-frame-pointer2019082820190816
12806036 0 016160 864 760dolbeau/std-4ftgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
13602968 0 013088 864 760dolbeau/std-1ftgcc_-funroll-loops_-Os_-fomit-frame-pointer2019082820190816
13602960 0 015928 880 768dolbeau/std-1ftgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
13602968 0 013088 864 760dolbeau/std-1ftgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
13603968 0 014720 880 768dolbeau/std-2ftgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
14403976 0 014040 864 760dolbeau/std-2ftgcc_-Os_-fomit-frame-pointer2019082820190816
14403980 0 016057 888 784dolbeau/std-2ftgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
14403976 0 014040 864 760dolbeau/std-2ftgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
15202968 0 013024 864 760dolbeau/std-1ftgcc_-Os_-fomit-frame-pointer2019082820190816
15202960 0 013712 880 768dolbeau/std-1ftgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
15202972 0 015049 888 784dolbeau/std-1ftgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
15202968 0 013024 864 760dolbeau/std-1ftgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
16806128 0 017072 880 768dolbeau/std-4ftgcc_-O_-fomit-frame-pointer2019082820190816
16806128 0 017072 880 768dolbeau/std-4ftgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
16806124 0 020288 880 768dolbeau/std-4ftgcc_-funroll-loops_-O_-fomit-frame-pointer2019082820190816
16806124 0 020288 880 768dolbeau/std-4ftgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
18404064 0 015008 880 768dolbeau/std-2ftgcc_-O_-fomit-frame-pointer2019082820190816
18404064 0 015008 880 768dolbeau/std-2ftgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
18404060 0 018224 880 768dolbeau/std-2ftgcc_-funroll-loops_-O_-fomit-frame-pointer2019082820190816
18404060 0 018224 880 768dolbeau/std-2ftgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
20003036 0 013992 880 768dolbeau/std-1ftgcc_-O_-fomit-frame-pointer2019082820190816
20003036 0 013992 880 768dolbeau/std-1ftgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
20003036 0 017208 880 768dolbeau/std-1ftgcc_-funroll-loops_-O_-fomit-frame-pointer2019082820190816
20003036 0 017208 880 768dolbeau/std-1ftgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
64003488 14336 017016 15200 768dolbeau/std-4ftcc2019082820190816
64003488 14336 017016 15200 768dolbeau/std-4ftgcc2019082820190816
64003488 14336 017016 15200 768dolbeau/std-4ftgcc_-funroll-loops2019082820190816
80803596 14336 017120 15200 768dolbeau/std-2ftcc2019082820190816
80803596 14336 017120 15200 768dolbeau/std-2ftgcc2019082820190816
80803596 14336 017120 15200 768dolbeau/std-2ftgcc_-funroll-loops2019082820190816
89603644 14336 017168 15200 768dolbeau/std-1ftcc2019082820190816
89603644 14336 017168 15200 768dolbeau/std-1ftgcc2019082820190816
89603644 14336 017168 15200 768dolbeau/std-1ftgcc_-funroll-loops2019082820190816
5091203160 0 015249 888 784refgcc_-O3_-fomit-frame-pointer2019082820190816
5127203392 0 016849 888 784refgcc_-funroll-loops_-O3_-fomit-frame-pointer2019082820190816
5456803436 0 016881 888 784refgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
5620003128 0 016446 800 792refclang_-mcpu=native_-mfpu=neon_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments2019082820190816
5896803172 0 015265 888 784refgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019082820190816
8068003380 0 016702 800 792refclang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019082820190816
8068003380 0 016702 800 792refclang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019082820190816
8147203380 0 016702 800 792refclang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019082820190816
8148003380 0 016646 800 792refclang_-O3_-fomit-frame-pointer_-Qunused-arguments2019082820190816
8164003156 0 016024 880 768refgcc_-funroll-loops_-O2_-fomit-frame-pointer2019082820190816
8167203164 0 016136 880 768refgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
11019203432 0 017608 880 768refgcc_-funroll-loops_-O_-fomit-frame-pointer2019082820190816
11019203432 0 017608 880 768refgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
29464001740 0 011872 864 760refgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
29586401740 0 011872 864 760refgcc_-funroll-loops_-Os_-fomit-frame-pointer2019082820190816
30567201680 0 011752 864 760refgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019082820190816
30568001680 0 011752 864 760refgcc_-Os_-fomit-frame-pointer2019082820190816
34678401808 0 012608 880 768refgcc_-O2_-fomit-frame-pointer2019082820190816
35585601780 0 012536 880 768refgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019082820190816
39404801872 0 012824 880 768refgcc_-O_-fomit-frame-pointer2019082820190816
39404801872 0 012824 880 768refgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019082820190816
165345603840 0 017352 864 768refgcc_-funroll-loops2019082820190816
165376803840 0 017352 864 768refgcc2019082820190816
165404803840 0 017352 864 768refcc2019082820190816

Compiler output

Implementation: dolbeau/armv8crypto
Security model: unknown
Compiler: cc
core.c: In file included from core.c:9:
core.c: core.c: In function 'aes256_armv8_encrypt':
core.c: /usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h:12426:1: error: inlining failed in call to always_inline 'vaeseq_u8': target specific option mismatch
core.c: vaeseq_u8 (uint8x16_t data, uint8x16_t key)
core.c: ^~~~~~~~~
core.c: core.c:122:10: note: called from here
core.c: temp = vaeseq_u8(temp, vld1q_u8((rkeys+208)));
core.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
core.c: In file included from core.c:9:
core.c: /usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h:12426:1: error: inlining failed in call to always_inline 'vaeseq_u8': target specific option mismatch
core.c: vaeseq_u8 (uint8x16_t data, uint8x16_t key)
core.c: ^~~~~~~~~
core.c: core.c:122:10: note: called from here
core.c: temp = vaeseq_u8(temp, vld1q_u8((rkeys+208)));
core.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Number of similar (compiler,implementation) pairs: 19, namely:
CompilerImplementations
cc dolbeau/armv8crypto
gcc dolbeau/armv8crypto
gcc -O2 -fomit-frame-pointer dolbeau/armv8crypto
gcc -O3 -fomit-frame-pointer dolbeau/armv8crypto
gcc -O -fomit-frame-pointer dolbeau/armv8crypto
gcc -Os -fomit-frame-pointer dolbeau/armv8crypto
gcc -fno-schedule-insns -O2 -fomit-frame-pointer dolbeau/armv8crypto
gcc -fno-schedule-insns -O3 -fomit-frame-pointer dolbeau/armv8crypto
gcc -fno-schedule-insns -O -fomit-frame-pointer dolbeau/armv8crypto
gcc -fno-schedule-insns -Os -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops dolbeau/armv8crypto
gcc -funroll-loops -O2 -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -O3 -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -O -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -Os -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer dolbeau/armv8crypto
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer dolbeau/armv8crypto

Compiler output

Implementation: dolbeau/armv8crypto
Security model: unknown
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
core.c: core.c:47:16: warning: implicit declaration of function 'vaeseq_u8' is invalid in C99 [-Wimplicit-function-declaration]
core.c: temp_lds = armv8_aese_sbox(rotl_aes_edrk);
core.c: ^
core.c: core.c:24:38: note: expanded from macro 'armv8_aese_sbox'
core.c: vgetq_lane_u32(vreinterpretq_u32_u8(vaeseq_u8(vreinterpretq_u8_u32(vdupq_n_u32(input)), vzero)),0)
core.c: ^
core.c: core.c:47:16: error: passing 'int' to parameter of incompatible type 'uint8x16_t' (vector of 16 'uint8_t' values)
core.c: temp_lds = armv8_aese_sbox(rotl_aes_edrk);
core.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
core.c: core.c:24:38: note: expanded from macro 'armv8_aese_sbox'
core.c: vgetq_lane_u32(vreinterpretq_u32_u8(vaeseq_u8(vreinterpretq_u8_u32(vdupq_n_u32(input)), vzero)),0)
core.c: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
core.c: /usr/lib/llvm-7/lib/clang/7.0.1/include/arm_neon.h:6460:21: note: expanded from macro 'vgetq_lane_u32'
core.c: uint32x4_t __s0 = __p0; \
core.c: ^~~~
core.c: /usr/lib/llvm-7/lib/clang/7.0.1/include/arm_neon.h:39628:49: note: passing argument to parameter '__p0' here
core.c: __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) {
core.c: ^
core.c: core.c:60:16: warning: implicit declaration of function 'vaeseq_u8' is invalid in C99 [-Wimplicit-function-declaration]
core.c: temp_lds = armv8_aese_sbox(tmp11);
core.c: ^
core.c: core.c:24:38: note: expanded from macro 'armv8_aese_sbox'
core.c: vgetq_lane_u32(vreinterpretq_u32_u8(vaeseq_u8(vreinterpretq_u8_u32(vdupq_n_u32(input)), vzero)),0)
core.c: ^
core.c: core.c:60:16: error: passing 'int' to parameter of incompatible type 'uint8x16_t' (vector of 16 'uint8_t' values)
core.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments dolbeau/armv8crypto
clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments dolbeau/armv8crypto
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments dolbeau/armv8crypto
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments dolbeau/armv8crypto