Implementation notes: aarch64, hikey960, crypto_aead/pi32cipher128v2

Computer: hikey960
Architecture: aarch64
CPU ID: 410fd034
SUPERCOP version: 20190816
Operation: crypto_aead
Primitive: pi32cipher128v2
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
5329819491 8 035362 944 864goptvgcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019112920190816
6218114731 8 027402 944 864goptvgcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019112920190816
9106112051 8 027939 952 896ref2gcc_-funroll-loops_-O3_-fomit-frame-pointer2019112920190816
9157517051 8 032594 944 864goptvgcc_-funroll-loops_-O2_-fomit-frame-pointer2019112920190816
9550314699 8 027210 944 864goptvgcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019112920190816
9771312027 8 026243 952 896ref2gcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019112920190816
9990014347 8 026922 944 864goptvgcc_-O2_-fomit-frame-pointer2019112920190816
10822514759 8 027426 944 864goptvgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv2019112920190816
14154219715 8 035603 952 896goptvgcc_-funroll-loops_-O3_-fomit-frame-pointer2019112920190816
14570518495 8 032771 952 896goptvgcc_-O3_-fomit-frame-pointer2019112920190816
14985019971 8 035811 952 896goptvgcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019112920190816
15817518799 8 033027 952 896goptvgcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019112920190816
15819417739 8 033314 944 864goptvgcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019112920190816
16650014227 8 026802 944 864goptvgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv2019112920190816
16650011763 8 026019 952 896ref2gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv2019112920190816
17497211795 8 026067 952 896ref2gcc_-O3_-fomit-frame-pointer2019112920190816
1843436379 8 019488 824 880ref3clang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019112920190816
1865436379 8 019488 824 880ref3clang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019112920190816
18692018659 8 032915 952 896goptvgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv2019112920190816
2081258899 8 022008 824 880ref2clang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019112920190816
2081258899 8 022008 824 880ref2clang_-O3_-fwrapv_-mavx_-maes_-mpclmul_-fomit-frame-pointer_-Qunused-arguments2019112920190816
2081506455 8 018286 928 864goptvgcc_-funroll-loops_-Os_-fomit-frame-pointer2019112920190816
2081508899 8 022008 824 880ref2clang_-O3_-fwrapv_-mavx2_-fomit-frame-pointer_-Qunused-arguments2019112920190816
2123136299 8 018118 928 864goptvgcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019112920190816
2131926899 8 021171 952 896ref3gcc_-O3_-fomit-frame-pointer2019112920190816
21645014731 8 027402 944 864goptvgcc_-O_-fomit-frame-pointer2019112920190816
2164506235 8 019328 824 880ref3clang_-O3_-fomit-frame-pointer_-Qunused-arguments2019112920190816
2220757083 8 021307 952 896ref3gcc_-fno-schedule-insns_-O3_-fomit-frame-pointer2019112920190816
22216719491 8 035362 944 864goptvgcc_-funroll-loops_-O_-fomit-frame-pointer2019112920190816
2247754735 8 017306 944 864ref3gcc_-O2_-fomit-frame-pointer2019112920190816
2443107787 8 023354 944 864ref3gcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019112920190816
2497509243 8 022336 824 880ref2clang_-O3_-fomit-frame-pointer_-Qunused-arguments2019112920190816
2497506955 8 021211 952 896ref3gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv2019112920190816
28305012267 8 028115 952 896ref2gcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019112920190816
2871783487 8 015294 928 864ref3gcc_-Os_-fomit-frame-pointer2019112920190816
2913753499 8 015334 928 864ref3gcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019112920190816
2997006423 8 022290 944 864ref3gcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019112920190816
3552806423 8 022290 944 864ref3gcc_-funroll-loops_-O_-fomit-frame-pointer2019112920190816
3642033947 8 016610 944 864ref3gcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019112920190816
3738706455 8 018286 928 864goptvgcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019112920190816
3738703947 8 016610 944 864ref3gcc_-O_-fomit-frame-pointer2019112920190816
4495503487 8 015294 928 864ref3gcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019112920190816
4579304743 8 017314 944 864ref3gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv2019112920190816
4640636299 8 018118 928 864goptvgcc_-Os_-fomit-frame-pointer2019112920190816
4885654871 8 017378 944 864ref3gcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019112920190816
4995603499 8 015334 928 864ref3gcc_-funroll-loops_-Os_-fomit-frame-pointer2019112920190816
5495168595 8 024162 944 864ref2gcc_-funroll-loops_-fno-schedule-insns_-O2_-fomit-frame-pointer2019112920190816
60772511528 8 028655 936 864goptvgcc2019112920190816
6493506379 8 019488 824 880ref3clang_-O3_-fwrapv_-mavx_-fomit-frame-pointer_-Qunused-arguments2019112920190816
6971353939 8 016610 944 864ref3gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv2019112920190816
7477407307 8 022842 944 864ref3gcc_-funroll-loops_-O2_-fomit-frame-pointer2019112920190816
7824567875 8 023763 952 896ref3gcc_-funroll-loops_-O3_-fomit-frame-pointer2019112920190816
8076228283 8 023818 944 864ref2gcc_-funroll-loops_-O2_-fomit-frame-pointer2019112920190816
8658006311 8 018126 928 864goptvgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv2019112920190816
8884007295 8 023162 944 864ref2gcc_-funroll-loops_-fno-schedule-insns_-O_-fomit-frame-pointer2019112920190816
9823503511 8 015318 928 864ref3gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv2019112920190816
10874498099 8 023939 952 896ref3gcc_-funroll-loops_-fno-schedule-insns_-O3_-fomit-frame-pointer2019112920190816
11547904631 8 017202 944 864ref2gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv2019112920190816
12987003859 8 015670 928 864ref2gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv2019112920190816
13403253851 8 015686 928 864ref2gcc_-funroll-loops_-fno-schedule-insns_-Os_-fomit-frame-pointer2019112920190816
13902754639 8 017146 944 864ref2gcc_-fno-schedule-insns_-O2_-fomit-frame-pointer2019112920190816
13987684623 8 017194 944 864ref2gcc_-O2_-fomit-frame-pointer2019112920190816
14319004323 8 016994 944 864ref2gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv2019112920190816
16233757295 8 023162 944 864ref2gcc_-funroll-loops_-O_-fomit-frame-pointer2019112920190816
168777011528 8 028655 936 864goptvcc2019112920190816
173992511528 8 028655 936 864goptvgcc_-funroll-loops2019112920190816
20063253851 8 015686 928 864ref2gcc_-funroll-loops_-Os_-fomit-frame-pointer2019112920190816
22688354335 8 017002 944 864ref2gcc_-fno-schedule-insns_-O_-fomit-frame-pointer2019112920190816
23386006732 8 023847 936 864ref3cc2019112920190816
23976003831 8 015638 928 864ref2gcc_-fno-schedule-insns_-Os_-fomit-frame-pointer2019112920190816
24059253831 8 015638 928 864ref2gcc_-Os_-fomit-frame-pointer2019112920190816
26140506732 8 023847 936 864ref3gcc2019112920190816
26140506732 8 023847 936 864ref3gcc_-funroll-loops2019112920190816
26268534335 8 017002 944 864ref2gcc_-O_-fomit-frame-pointer2019112920190816
71444236992 8 024103 936 864ref2cc2019112920190816
124209006992 8 024103 936 864ref2gcc2019112920190816
168613066992 8 024103 936 864ref2gcc_-funroll-loops2019112920190816

Test failure

Implementation: optimized_nonSSE
Security model: unknown
Compiler: cc
error 111
crypto_aead_encrypt returns more than crypto_aead_ABYTES extra bytes

Number of similar (compiler,implementation) pairs: 54, namely:
CompilerImplementations
cc optimized_nonSSE
clang -O3 -fomit-frame-pointer -Qunused-arguments optimized_nonSSE
clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments optimized_nonSSE
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments optimized_nonSSE
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments optimized_nonSSE
gcc optimized_nonSSE
gcc -O2 -fomit-frame-pointer optimized_nonSSE
gcc -O3 -fomit-frame-pointer optimized_nonSSE
gcc -O -fomit-frame-pointer optimized_nonSSE
gcc -Os -fomit-frame-pointer optimized_nonSSE
gcc -fno-schedule-insns -O2 -fomit-frame-pointer optimized_nonSSE
gcc -fno-schedule-insns -O3 -fomit-frame-pointer optimized_nonSSE
gcc -fno-schedule-insns -O -fomit-frame-pointer optimized_nonSSE
gcc -fno-schedule-insns -Os -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops optimized_nonSSE
gcc -funroll-loops -O2 -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -O3 -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -O -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -Os -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer optimized_nonSSE
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer optimized_nonSSE
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv optimized_nonSSE
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv optimized_nonSSE
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv optimized_nonSSE
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv optimized_nonSSE
cc ref
clang -O3 -fomit-frame-pointer -Qunused-arguments ref
clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments ref
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments ref
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments ref
gcc ref
gcc -O2 -fomit-frame-pointer ref
gcc -O3 -fomit-frame-pointer ref
gcc -O -fomit-frame-pointer ref
gcc -Os -fomit-frame-pointer ref
gcc -fno-schedule-insns -O2 -fomit-frame-pointer ref
gcc -fno-schedule-insns -O3 -fomit-frame-pointer ref
gcc -fno-schedule-insns -O -fomit-frame-pointer ref
gcc -fno-schedule-insns -Os -fomit-frame-pointer ref
gcc -funroll-loops ref
gcc -funroll-loops -O2 -fomit-frame-pointer ref
gcc -funroll-loops -O3 -fomit-frame-pointer ref
gcc -funroll-loops -O -fomit-frame-pointer ref
gcc -funroll-loops -Os -fomit-frame-pointer ref
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer ref
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer ref
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer ref
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer ref
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv ref
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv ref
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv ref
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv ref

Compiler output

Implementation: goptv
Security model: unknown
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
pi-cipher.c: pi-cipher.c:273:15: error: cannot convert between vector values of different size ('qword_t' (vector of 4 'word_t' values) and 'uint8_t' (aka 'unsigned char'))
pi-cipher.c: return (x << n) | (x >> ((PI_WORD_SIZE) - n));
pi-cipher.c: ~ ^ ~
pi-cipher.c: pi-cipher.c:273:26: error: cannot convert between vector values of different size ('qword_t' (vector of 4 'word_t' values) and 'int')
pi-cipher.c: return (x << n) | (x >> ((PI_WORD_SIZE) - n));
pi-cipher.c: ~ ^ ~~~~~~~~~~~~~~~~~~~~
pi-cipher.c: pi-cipher.c:286:9: error: use of unknown builtin '__builtin_shuffle' [-Wimplicit-function-declaration]
pi-cipher.c: n_t += __builtin_shuffle(y, g_mask);
pi-cipher.c: ^
pi-cipher.c: pi-cipher.c:286:6: error: cannot convert between vector values of different size ('vchunk_t' (vector of 4 'word_t' values) and 'int')
pi-cipher.c: n_t += __builtin_shuffle(y, g_mask);
pi-cipher.c: ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pi-cipher.c: pi-cipher.c:287:6: error: cannot convert between vector values of different size ('vchunk_t' (vector of 4 'word_t' values) and 'int')
pi-cipher.c: n_t += __builtin_shuffle(y, n_mask);
pi-cipher.c: ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pi-cipher.c: pi-cipher.c:289:8: error: cannot convert between vector values of different size ('unsigned long' and 'vchunk_t' (vector of 4 'word_t' values))
pi-cipher.c: n_t = ROTL(n_t, n_rot);
pi-cipher.c: ^~~~~~~~~~~~~~~~
pi-cipher.c: pi-cipher.c:267:64: note: expanded from macro 'ROTL'
pi-cipher.c: #define ROTL(x, n) (((x) << (n)) | ((x) >> (sizeof(word_t) * 8 - (n))))
pi-cipher.c: ~~~~~~~~~~~~~~~~~~ ^ ~~~
pi-cipher.c: pi-cipher.c:290:6: error: cannot convert between vector values of different size ('vchunk_t' (vector of 4 'word_t' values) and 'int')
pi-cipher.c: n_t ^= __builtin_shuffle(n_t, n_x_1) ^ __builtin_shuffle(n_t, n_x_2);
pi-cipher.c: ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pi-cipher.c: pi-cipher.c:305:6: error: cannot convert between vector values of different size ('vchunk_t' (vector of 4 'word_t' values) and 'int')
pi-cipher.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments goptv
clang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments goptv
clang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments goptv
clang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments goptv