Implementation notes: amd64, waldorf, crypto_hash/keccakc448

Computer: waldorf
Architecture: amd64
CPU ID: GenuineIntel-000106e5-bfebfbff
SUPERCOP version: 20160715
Operation: crypto_hash
Primitive: keccakc448
TimeImplementationCompilerBenchmark dateSUPERCOP version
38276opt64lcu6gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
40180x86_64_asmgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
40836inplacegcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
41008inplacegcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
41068opt64lcu6clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
41172opt64lcu24shldgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
41448opt64lcu6gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
41628opt64lcu6gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
41784x86_64_asmgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
42184opt64lcu24gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
42796opt64lcu24clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
42844x86_64_asmgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
42932opt64lcu24gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
43056inplaceclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
43492simplegcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
43496opt64lcu24shldgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
43584x86_64_shldgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
43624x86_64_shldgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
43696sseu2gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
43716x86_64_shldgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
43788opt64lcu24gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
44068opt64lcu24shldgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
44176simplegcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
44392x86_64_asmgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
44408simplegcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
44408x86_64_shldgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
44596opt64lcu24shldgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
44816simplegcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
44820opt64u6gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
44824opt64lcu6gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
44944opt64u6clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
45036opt64lcu24gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
45268sseu2gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
45288sseu2clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
45784inplacegcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
46068opt64lcu24shldclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
46604opt64u6gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
47192sseu2gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
47236opt64u6gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
47452opt64u6gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
47676simpleclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
47856inplacegcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
55844sseu2gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
56752mmxu1gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
59548mmxu1gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
60156mmxu1gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
61828mmxu1clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
62908mmxu1gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
92028opt32bi-rvku2gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
92224opt32bi-s2lcu4clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
93832opt32bi-s2lcu4gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
94380opt32biT-s2lcu4gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
95220opt32biT-s2lcu4gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
95404opt32bi-s2lcu4gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
96436opt32biT-s2lcu4gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
97396opt32bi-s2lcu4gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
97736compactgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
98012opt32biT-s2lcu4clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
98828opt32bi-rvku2gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
98976opt32biT-s2lcu4gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
99244opt32bi-s2lcu4gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
111272opt32bi-rvku2clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
111848compactclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
112776opt32bi-rvku2gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
114320opt32bi-rvku2gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
179136compactgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715
190072compactgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
193464compactgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
365916compact8gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016071820160715
375764compact8gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016071820160715
413348compact8clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016071820160715
427072compact8gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016071820160715
872548compact8gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016071820160715

Compiler output

Implementation: crypto_hash/keccakc448/inplace32bi
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
Keccak-inplace32BI.c: Keccak-inplace32BI.c:73:6: error: "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: ^
Keccak-inplace32BI.c: 1 error generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments inplace32bi

Compiler output

Implementation: crypto_hash/keccakc448/simple32bi
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
Keccak-simple32BI.c: Keccak-simple32BI.c:73:6: error: "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: ^
Keccak-simple32BI.c: 1 error generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments simple32bi

Compiler output

Implementation: crypto_hash/keccakc448/xopu24
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:185:5: warning: implicit declaration of function '_mm_roti_epi64' is invalid in C99 [-Wimplicit-function-declaration]
KeccakF-1600-opt64.c: rounds
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-unrolling.macros:17:5: note: expanded from macro 'rounds'
KeccakF-1600-opt64.c: thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-xop.macros:53:5: note: expanded from macro 'thetaRhoPiChiIotaPrepareTheta'
KeccakF-1600-opt64.c: computeD \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-xop.macros:40:23: note: expanded from macro 'computeD'
KeccakF-1600-opt64.c: Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:102:33: note: expanded from macro 'ROL6464same'
KeccakF-1600-opt64.c: #define ROL6464same(a, o) _mm_roti_epi64(a, o)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:90:50: note: expanded from macro 'XOR128'
KeccakF-1600-opt64.c: #define XOR128(a, b) _mm_xor_si128(a, b)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:185:5: error: passing 'int' to parameter of incompatible type '__m128i' (vector of 2 'long long' values)
KeccakF-1600-opt64.c: rounds
KeccakF-1600-opt64.c: ^~~~~~
KeccakF-1600-opt64.c: ./KeccakF-1600-unrolling.macros:17:5: note: expanded from macro 'rounds'
KeccakF-1600-opt64.c: thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
KeccakF-1600-opt64.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
KeccakF-1600-opt64.c: ./KeccakF-1600-xop.macros:53:5: note: expanded from macro 'thetaRhoPiChiIotaPrepareTheta'
KeccakF-1600-opt64.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments xopu24

Compiler output

Implementation: crypto_hash/keccakc448/x86_64_asm
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:50:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ apState, %rdi
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:51:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ apInput, %rsi
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:52:18: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ aNbrWords, %rdx
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:55:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ xpState, %r9
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:58:12: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ rT1, %rax
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:59:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ rpState, %rdi
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:60:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ rpStack, %rsp
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:62:12: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ rDa, %rbx
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:63:12: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments x86_64_asm

Compiler output

Implementation: crypto_hash/keccakc448/x86_64_shld
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:50:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ apState, %rdi
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:51:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ apInput, %rsi
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:52:18: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ aNbrWords, %rdx
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:55:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ xpState, %r9
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:58:12: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ rT1, %rax
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:59:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ rpState, %rdi
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:60:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ rpStack, %rsp
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:62:12: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ rDa, %rbx
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:63:12: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments x86_64_shld

Compiler output

Implementation: crypto_hash/keccakc448/inplace32bi
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv
Keccak-inplace32BI.c: Keccak-inplace32BI.c: In function 'crypto_hash_keccakc448_inplace32bi':
Keccak-inplace32BI.c: Keccak-inplace32BI.c:73:6: error: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv inplace32bi
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv inplace32bi
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv inplace32bi
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv inplace32bi

Compiler output

Implementation: crypto_hash/keccakc448/simple32bi
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv
Keccak-simple32BI.c: Keccak-simple32BI.c: In function 'crypto_hash_keccakc448_simple32bi':
Keccak-simple32BI.c: Keccak-simple32BI.c:73:6: error: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv simple32bi
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv simple32bi
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv simple32bi
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv simple32bi

Compiler output

Implementation: crypto_hash/keccakc448/xopu24
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv
KeccakF-1600-opt64.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/4.9/include/x86intrin.h:52:0,
KeccakF-1600-opt64.c: from KeccakF-1600-opt64.c:74:
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c: In function 'KeccakPermutationOnWords':
KeccakF-1600-opt64.c: /usr/lib/gcc/x86_64-linux-gnu/4.9/include/xopintrin.h:266:1: error: inlining failed in call to always_inline '_mm_roti_epi64': target specific option mismatch
KeccakF-1600-opt64.c: _mm_roti_epi64(__m128i __A, const int __B)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:90:33: error: called from here
KeccakF-1600-opt64.c: #define XOR128(a, b) _mm_xor_si128(a, b)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-xop.macros:40:11: note: in expansion of macro 'XOR128'
KeccakF-1600-opt64.c: Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-xop.macros:53:5: note: in expansion of macro 'computeD'
KeccakF-1600-opt64.c: computeD \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-unrolling.macros:17:5: note: in expansion of macro 'thetaRhoPiChiIotaPrepareTheta'
KeccakF-1600-opt64.c: thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:185:5: note: in expansion of macro 'rounds'
KeccakF-1600-opt64.c: rounds
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/4.9/include/x86intrin.h:52:0,
KeccakF-1600-opt64.c: from KeccakF-1600-opt64.c:74:
KeccakF-1600-opt64.c: /usr/lib/gcc/x86_64-linux-gnu/4.9/include/xopintrin.h:266:1: error: inlining failed in call to always_inline '_mm_roti_epi64': target specific option mismatch
KeccakF-1600-opt64.c: _mm_roti_epi64(__m128i __A, const int __B)
KeccakF-1600-opt64.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv xopu24
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv xopu24
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv xopu24
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv xopu24