Implementation notes: amd64, h4atom, crypto_hash/keccakc448

Computer: h4atom
Architecture: amd64
CPU ID: GenuineIntel-000106ca-bfe9fbff
SUPERCOP version: 20160806
Operation: crypto_hash
Primitive: keccakc448
TimeImplementationCompilerBenchmark dateSUPERCOP version
32216opt64lcu24clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
32584opt64lcu6clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
34768opt64lcu24gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
36032opt64lcu6gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
36168opt64u6clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
37720opt64lcu24gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
37784opt64lcu24gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
37792opt64lcu6gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
38280opt64u6gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
38592opt64lcu6gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
38744simpleclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
39000inplacegcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
39688simplegcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
40432simplegcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
41200inplaceclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
41216opt64u6gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
41440x86_64_asmgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
41464x86_64_asmgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
41520x86_64_asmgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
41848x86_64_asmgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
42032inplacegcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
42760opt64u6gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
43192simplegcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
43656inplacegcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
48240opt64lcu24gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
49704opt64lcu6gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
52056sseu2gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
55360sseu2gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
55416sseu2gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
56544sseu2clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
58264simplegcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
58776opt64u6gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
59968inplacegcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
67984sseu2gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
73352mmxu1clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
74144mmxu1gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
74240mmxu1gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
74920mmxu1gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
81696opt32bi-s2lcu4clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
83976mmxu1gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
87840opt32bi-rvku2clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
91272opt32bi-s2lcu4gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
91312opt32bi-s2lcu4gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
92928opt32bi-rvku2gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
97320opt32biT-s2lcu4clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
98448opt32bi-s2lcu4gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
100784opt32bi-rvku2gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
101576opt32biT-s2lcu4gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
104792compactgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
106512opt32bi-rvku2gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
108544opt32biT-s2lcu4gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
108776opt32biT-s2lcu4gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
112024opt64lcu24shldgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
114480opt64lcu24shldgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
114624opt64lcu24shldgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
116040opt64lcu24shldclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
118064x86_64_shldgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
118096x86_64_shldgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
118104x86_64_shldgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
118472x86_64_shldgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
121912opt64lcu24shldgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
124200opt32bi-s2lcu4gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
135224opt32biT-s2lcu4gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
145184compactclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
146136opt32bi-rvku2gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
199872compactgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
221456compactgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
249200compactgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806
341304compact8gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016081220160806
395456compact8gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016081220160806
417536compact8clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016081220160806
468064compact8gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016081220160806
519824compact8gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016081220160806

Compiler output

Implementation: crypto_hash/keccakc448/inplace32bi
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
Keccak-inplace32BI.c: Keccak-inplace32BI.c:73:6: error: "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: ^
Keccak-inplace32BI.c: 1 error generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments inplace32bi

Compiler output

Implementation: crypto_hash/keccakc448/simple32bi
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
Keccak-simple32BI.c: Keccak-simple32BI.c:73:6: error: "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: ^
Keccak-simple32BI.c: 1 error generated.

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments simple32bi

Compiler output

Implementation: crypto_hash/keccakc448/xopu24
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:185:5: error: '__builtin_ia32_vprotqi' needs target feature xop
KeccakF-1600-opt64.c: rounds
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-unrolling.macros:17:5: note: expanded from macro 'rounds'
KeccakF-1600-opt64.c: thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-xop.macros:53:5: note: expanded from macro 'thetaRhoPiChiIotaPrepareTheta'
KeccakF-1600-opt64.c: computeD \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-xop.macros:40:23: note: expanded from macro 'computeD'
KeccakF-1600-opt64.c: Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:102:33: note: expanded from macro 'ROL6464same'
KeccakF-1600-opt64.c: #define ROL6464same(a, o) _mm_roti_epi64(a, o)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: /usr/lib/llvm-3.8/bin/../lib/clang/3.8.0/include/xopintrin.h:250:12: note: expanded from macro '_mm_roti_epi64'
KeccakF-1600-opt64.c: (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:185:5: error: '__builtin_ia32_vprotqi' needs target feature xop
KeccakF-1600-opt64.c: ./KeccakF-1600-unrolling.macros:17:5: note: expanded from macro 'rounds'
KeccakF-1600-opt64.c: thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ./KeccakF-1600-xop.macros:53:5: note: expanded from macro 'thetaRhoPiChiIotaPrepareTheta'
KeccakF-1600-opt64.c: computeD \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments xopu24

Compiler output

Implementation: crypto_hash/keccakc448/x86_64_asm
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:50:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ apState, %rdi
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:50:16: error: missing expression
KeccakF-1600-x86-64-gas.s: .equ apState, %rdi
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:52:18: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ aNbrWords, %rdx
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:52:18: error: missing expression
KeccakF-1600-x86-64-gas.s: .equ aNbrWords, %rdx
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:55:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ xpState, %r9
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:55:16: error: missing expression
KeccakF-1600-x86-64-gas.s: .equ xpState, %r9
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:58:12: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: .equ rT1, %rax
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:58:12: error: missing expression
KeccakF-1600-x86-64-gas.s: .equ rT1, %rax
KeccakF-1600-x86-64-gas.s: ^
KeccakF-1600-x86-64-gas.s: KeccakF-1600-x86-64-gas.s:60:16: error: unknown token in expression
KeccakF-1600-x86-64-gas.s: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments x86_64_asm

Compiler output

Implementation: crypto_hash/keccakc448/x86_64_shld
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:50:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ apState, %rdi
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:50:16: error: missing expression
KeccakF-1600-x86-64-shld-gas.s: .equ apState, %rdi
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:52:18: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ aNbrWords, %rdx
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:52:18: error: missing expression
KeccakF-1600-x86-64-shld-gas.s: .equ aNbrWords, %rdx
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:55:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ xpState, %r9
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:55:16: error: missing expression
KeccakF-1600-x86-64-shld-gas.s: .equ xpState, %r9
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:58:12: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: .equ rT1, %rax
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:58:12: error: missing expression
KeccakF-1600-x86-64-shld-gas.s: .equ rT1, %rax
KeccakF-1600-x86-64-shld-gas.s: ^
KeccakF-1600-x86-64-shld-gas.s: KeccakF-1600-x86-64-shld-gas.s:60:16: error: unknown token in expression
KeccakF-1600-x86-64-shld-gas.s: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments x86_64_shld

Compiler output

Implementation: crypto_hash/keccakc448/inplace32bi
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv
Keccak-inplace32BI.c: Keccak-inplace32BI.c: In function 'crypto_hash_keccakc448_inplace32bi':
Keccak-inplace32BI.c: Keccak-inplace32BI.c:73:6: error: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-inplace32BI.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv inplace32bi
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv inplace32bi
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv inplace32bi
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv inplace32bi

Compiler output

Implementation: crypto_hash/keccakc448/simple32bi
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv
Keccak-simple32BI.c: Keccak-simple32BI.c: In function 'crypto_hash_keccakc448_simple32bi':
Keccak-simple32BI.c: Keccak-simple32BI.c:73:6: error: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: #error "The output size must be a multiple of the lane size in this simple implementation."
Keccak-simple32BI.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv simple32bi
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv simple32bi
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv simple32bi
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv simple32bi

Compiler output

Implementation: crypto_hash/keccakc448/xopu24
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv
KeccakF-1600-opt64.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/x86intrin.h:41:0,
KeccakF-1600-opt64.c: from KeccakF-1600-opt64.c:74:
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c: In function 'KeccakPermutationOnWords':
KeccakF-1600-opt64.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/smmintrin.h:216:1: error: inlining failed in call to always_inline '_mm_blend_pd': target specific option mismatch
KeccakF-1600-opt64.c: _mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:97:43: error: called from here
KeccakF-1600-opt64.c: #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:94:51: note: in definition of macro 'SWAP64'
KeccakF-1600-opt64.c: #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:98:40: note: in expansion of macro 'GET64LOHI'
KeccakF-1600-opt64.c: #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-xop.macros:106:13: note: in expansion of macro 'GET64HILO'
KeccakF-1600-opt64.c: Bsosu = GET64HILO(Bsiso, Bsusa); \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-xop.macros:123:36: note: in expansion of macro 'thetaRhoPiChiIotaPrepareTheta'
KeccakF-1600-opt64.c: #define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-unrolling.macros:40:5: note: in expansion of macro 'thetaRhoPiChiIota'
KeccakF-1600-opt64.c: thetaRhoPiChiIota(23, E, A) \
KeccakF-1600-opt64.c: ^
KeccakF-1600-opt64.c: KeccakF-1600-opt64.c:185:5: note: in expansion of macro 'rounds'
KeccakF-1600-opt64.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv xopu24
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv xopu24
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv xopu24
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv xopu24