Implementation notes: aarch64, pmnod145, crypto_kem/ntskem13136

Computer: pmnod145
Architecture: aarch64
CPU ID: 431f0af1
SUPERCOP version: 20191017
Operation: crypto_kem
Primitive: ntskem13136
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
4567000116305 84 16148234 992 1584optarmclang_-march=armv8a+crypto_-mcpu=thunderx2t99_-mtune=thunderx2t99_-O2_-fPIC_-fPIE2019101820191017
4603975180533 84 16212722 992 1584optarmclang_-march=armv8a+crypto_-mcpu=thunderx2t99_-mtune=thunderx2t99_-O3_-fPIC_-fPIE2019101820191017
523090064128 84 1691989 968 1584optgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
524882574840 84 16103781 976 1600optgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
529872562029 84 1688848 952 1568optgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
553162562152 84 1689741 968 1584optgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
3274697560993 76 1693906 984 1584refarmclang_-march=armv8a+crypto_-mcpu=thunderx2t99_-mtune=thunderx2t99_-O3_-fPIC_-fPIE2019101820191017
3286865057381 76 1689538 984 1584refarmclang_-march=armv8a+crypto_-mcpu=thunderx2t99_-mtune=thunderx2t99_-O2_-fPIC_-fPIE2019101820191017
3500882532236 76 1661197 960 1600refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
3653897524536 76 1652325 960 1584refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
3759907523576 76 1651181 960 1584refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017
4074262520577 76 1647288 944 1568refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2019101820191017

Compiler output

Implementation: crypto_kem/ntskem13136/sse2
Compiler: armclang -march=armv8a+crypto -mcpu=thunderx2t99 -mtune=thunderx2t99 -O2 -fPIC -fPIE
bitslice_bma_128.c: In file included from bitslice_bma_128.c:17:
bitslice_bma_128.c: ./bits.h:47:9: error: unknown type name '__m128i'
bitslice_bma_128.c: typedef __m128i vector;
bitslice_bma_128.c: ^
bitslice_bma_128.c: ./bits.h:98:11: error: unknown type name '__m128i'
bitslice_bma_128.c: const __m128i a_hi = _mm_unpackhi_epi64(a, a);
bitslice_bma_128.c: ^
bitslice_bma_128.c: ./bits.h:98:26: warning: implicit declaration of function '_mm_unpackhi_epi64' is invalid in C99 [-Wimplicit-function-declaration]
bitslice_bma_128.c: const __m128i a_hi = _mm_unpackhi_epi64(a, a);
bitslice_bma_128.c: ^
bitslice_bma_128.c: ./bits.h:99:21: warning: implicit declaration of function '_mm_cvtsi128_si64' is invalid in C99 [-Wimplicit-function-declaration]
bitslice_bma_128.c: return popcount(_mm_cvtsi128_si64(a_hi)) + popcount(_mm_cvtsi128_si64(a));
bitslice_bma_128.c: ^
bitslice_bma_128.c: In file included from bitslice_bma_128.c:18:
bitslice_bma_128.c: In file included from ./bitslice_bma_128.h:18:
bitslice_bma_128.c: In file included from /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/immintrin.h:28:
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:64:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:143:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:173:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
armclang -march=armv8a+crypto -mcpu=thunderx2t99 -mtune=thunderx2t99 -O2 -fPIC -fPIE sse2
armclang -march=armv8a+crypto -mcpu=thunderx2t99 -mtune=thunderx2t99 -O3 -fPIC -fPIE sse2

Compiler output

Implementation: crypto_kem/ntskem13136/avx2
Compiler: armclang -march=armv8a+crypto -mcpu=thunderx2t99 -mtune=thunderx2t99 -O2 -fPIC -fPIE
bitslice_bma_128.c: In file included from bitslice_bma_128.c:18:
bitslice_bma_128.c: In file included from ./bitslice_bma_128.h:18:
bitslice_bma_128.c: In file included from /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/immintrin.h:28:
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:64:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:143:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:173:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:203:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:230:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:253:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:274:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
bitslice_bma_128.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: /dibona_home_nfs/rdolbeau/opt/arm/arm-hpc-compiler-19.3_Generic-AArch64_RHEL-7_aarch64-linux/lib/clang/7.1.0/include/mmintrin.h:301:12: error: invalid conversion between vector type '__m64' (vector of 1 'long long' value) and integer type 'int' of different size
bitslice_bma_128.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
armclang -march=armv8a+crypto -mcpu=thunderx2t99 -mtune=thunderx2t99 -O2 -fPIC -fPIE avx2
armclang -march=armv8a+crypto -mcpu=thunderx2t99 -mtune=thunderx2t99 -O3 -fPIC -fPIE avx2

Compiler output

Implementation: crypto_kem/ntskem13136/sse2
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
bitslice_bma_128.c: In file included from bitslice_bma_128.c:17:
bitslice_bma_128.c: bits.h:47:9: error: unknown type name '__m128i'
bitslice_bma_128.c: 47 | typedef __m128i vector;
bitslice_bma_128.c: | ^~~~~~~
bitslice_bma_128.c: bits.h: In function 'vector_popcount':
bitslice_bma_128.c: bits.h:98:11: error: unknown type name '__m128i'
bitslice_bma_128.c: 98 | const __m128i a_hi = _mm_unpackhi_epi64(a, a);
bitslice_bma_128.c: | ^~~~~~~
bitslice_bma_128.c: bits.h:98:26: warning: implicit declaration of function '_mm_unpackhi_epi64' [-Wimplicit-function-declaration]
bitslice_bma_128.c: 98 | const __m128i a_hi = _mm_unpackhi_epi64(a, a);
bitslice_bma_128.c: | ^~~~~~~~~~~~~~~~~~
bitslice_bma_128.c: bits.h:99:21: warning: implicit declaration of function '_mm_cvtsi128_si64' [-Wimplicit-function-declaration]
bitslice_bma_128.c: 99 | return popcount(_mm_cvtsi128_si64(a_hi)) + popcount(_mm_cvtsi128_si64(a));
bitslice_bma_128.c: | ^~~~~~~~~~~~~~~~~
bitslice_bma_128.c: bits.h:93:42: note: in definition of macro 'popcount'
bitslice_bma_128.c: 93 | #define popcount(x) __builtin_popcountll(x)
bitslice_bma_128.c: | ^
bitslice_bma_128.c: In file included from bitslice_bma_128.c:18:
bitslice_bma_128.c: bitslice_bma_128.h: At top level:
bitslice_bma_128.c: bitslice_bma_128.h:18:10: fatal error: immintrin.h: No such file or directory
bitslice_bma_128.c: 18 | #include <immintrin.h>
bitslice_bma_128.c: | ^~~~~~~~~~~~~
bitslice_bma_128.c: compilation terminated.

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE sse2
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE sse2
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE sse2
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE sse2

Compiler output

Implementation: crypto_kem/ntskem13136/avx2
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
bitslice_bma_128.c: In file included from bitslice_bma_128.c:18:
bitslice_bma_128.c: bitslice_bma_128.h:18:10: fatal error: immintrin.h: No such file or directory
bitslice_bma_128.c: 18 | #include <immintrin.h>
bitslice_bma_128.c: | ^~~~~~~~~~~~~
bitslice_bma_128.c: compilation terminated.

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE avx2
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE avx2
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE avx2
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE avx2