Implementation notes: amd64, sandy, crypto_hash/sarmal512

Computer: sandy
Architecture: amd64
CPU ID: GenuineIntel-000206a7-bfebfbff
SUPERCOP version: 20120908
Operation: crypto_hash
Primitive: sarmal512
TimeImplementationCompilerBenchmark dateSUPERCOP version
15796opt64gcc -funroll-loops -m64 -O -fomit-frame-pointer2012081620120812
15812opt64gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2012081620120812
15896opt64clang -O3 -mavx2012071220120709
15900opt64clang -O32012071220120709
15920opt64gcc -funroll-loops -O -fomit-frame-pointer2012081620120812
15948opt64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
15972asm64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
15988asm64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2012081620120812
16012asm64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2012081620120812
16032asm64gcc -march=nocona -O3 -fomit-frame-pointer2012081620120812
16036asm64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
16068asm64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2012081620120812
16116asm64gcc -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
16156opt64gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2012081620120812
16180asm64gcc -march=nocona -O -fomit-frame-pointer2012081620120812
16208asm64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
16244opt64gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2012081620120812
16256opt64gcc -funroll-loops -O3 -fomit-frame-pointer2012081620120812
16260opt64gcc -funroll-loops -m64 -O3 -fomit-frame-pointer2012081620120812
16260asm64gcc -march=nocona -O2 -fomit-frame-pointer2012081620120812
16264opt64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2012081620120812
16276opt64gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2012081620120812
16280opt64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2012081620120812
16284asm64gcc -march=barcelona -O3 -fomit-frame-pointer2012081620120812
16288opt64gcc -march=nocona -O3 -fomit-frame-pointer2012081620120812
16292opt64gcc -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
16296opt64gcc -fno-schedule-insns -O -fomit-frame-pointer2012081620120812
16296opt64gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2012081620120812
16300opt64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2012081620120812
16300opt64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2012081620120812
16304opt64gcc -O3 -fomit-frame-pointer2012081620120812
16308asm64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
16316opt64gcc -m64 -march=corei7 -O -fomit-frame-pointer2012081620120812
16320opt64gcc -O -fomit-frame-pointer2012081620120812
16320opt64gcc -m64 -O3 -fomit-frame-pointer2012081620120812
16320opt64gcc -m64 -march=core2 -O -fomit-frame-pointer2012081620120812
16324opt64gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2012081620120812
16324opt64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
16328opt64gcc -funroll-loops -m64 -O2 -fomit-frame-pointer2012081620120812
16328asm64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
16332opt64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2012081620120812
16344asm64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
16344opt64gcc -m64 -O -fomit-frame-pointer2012081620120812
16368asm64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
16372asm64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2012081620120812
16400asm64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2012081620120812
16412opt64gcc -funroll-loops -O2 -fomit-frame-pointer2012081620120812
16412asm64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2012081620120812
16412opt64gcc -m64 -march=core2 -O3 -fomit-frame-pointer2012081620120812
16420asm64gcc -march=barcelona -O -fomit-frame-pointer2012081620120812
16432asm64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2012081620120812
16464asm64gcc -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
16488asm64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2012081620120812
16504opt64gcc -fno-schedule-insns -O3 -fomit-frame-pointer2012081620120812
16504opt64gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2012081620120812
16516opt64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
16528opt64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
16568opt64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2012081620120812
16576opt64gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2012081620120812
16580asm64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
16584opt64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2012081620120812
16588opt64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
16588opt64gcc -m64 -march=corei7 -O3 -fomit-frame-pointer2012081620120812
16632opt64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2012081620120812
16644asm64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
16656asm64gcc -m64 -Os -fomit-frame-pointer2012081620120812
16668asm64gcc -Os -fomit-frame-pointer2012081620120812
16672asm64gcc -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
16680asm64gcc -march=nocona -Os -fomit-frame-pointer2012081620120812
16692asm64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
16696asm64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
16712asm64gcc -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
16720opt64gcc -march=nocona -O -fomit-frame-pointer2012081620120812
16724asm64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2012081620120812
16740asm64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
16752asm64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
16784asm64gcc -march=barcelona -O2 -fomit-frame-pointer2012081620120812
16800asm64gcc -march=barcelona -Os -fomit-frame-pointer2012081620120812
16804opt64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2012081620120812
16812asm64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2012081620120812
16816asm64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2012081620120812
16824asm64gcc -funroll-loops -Os -fomit-frame-pointer2012081620120812
16832asm64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2012081620120812
16836asm64gcc -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
16844asm64gcc -m64 -march=core2 -Os -fomit-frame-pointer2012081620120812
16864asm64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
16868opt64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2012081620120812
16872asm64gcc -march=k8 -Os -fomit-frame-pointer2012081620120812
16876opt64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2012081620120812
16876asm64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
16876opt64gcc -m64 -march=k8 -O -fomit-frame-pointer2012081620120812
16880opt64gcc -march=barcelona -O -fomit-frame-pointer2012081620120812
16884asm64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2012081620120812
16900asm64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2012081620120812
16900opt64gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2012081620120812
16900asm64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2012081620120812
16904asm64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2012081620120812
16928opt64gcc -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
16996opt64gcc -march=k8 -O -fomit-frame-pointer2012081620120812
17024opt64gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2012081620120812
17064asm64cc2012081620120812
17072opt64gcc -march=nocona -O2 -fomit-frame-pointer2012081620120812
17128asm64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2012081620120812
17152opt64gcc -O2 -fomit-frame-pointer2012081620120812
17168opt64gcc -m64 -march=core2 -O2 -fomit-frame-pointer2012081620120812
17188opt64gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2012081620120812
17196opt64gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2012081620120812
17212opt64gcc -m64 -O2 -fomit-frame-pointer2012081620120812
17220opt64gcc -fno-schedule-insns -O2 -fomit-frame-pointer2012081620120812
17220opt64gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2012081620120812
17228asm64gcc2012081620120812
17252asm64gcc -funroll-loops2012081620120812
17256opt64gcc -m64 -march=corei7 -O2 -fomit-frame-pointer2012081620120812
17352opt64gcc -march=barcelona -O2 -fomit-frame-pointer2012081620120812
17360opt64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
17452asm64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2012081620120812
17540opt64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
17576opt64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
17620opt64gcc -march=k8 -O2 -fomit-frame-pointer2012081620120812
17628opt64gcc -funroll-loops -Os -fomit-frame-pointer2012081620120812
17652opt64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
17660opt64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
17664opt64gcc -m64 -march=k8 -O2 -fomit-frame-pointer2012081620120812
17664opt64gcc -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
17680opt64gcc -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
17684opt64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2012081620120812
17688opt64gcc -march=barcelona -Os -fomit-frame-pointer2012081620120812
17696opt64gcc -m64 -Os -fomit-frame-pointer2012081620120812
17708opt64gcc -Os -fomit-frame-pointer2012081620120812
17712opt64gcc -march=k8 -Os -fomit-frame-pointer2012081620120812
17716opt64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2012081620120812
17732opt64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
17732opt64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
17736opt64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2012081620120812
17736opt64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2012081620120812
17736opt64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2012081620120812
17740opt64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2012081620120812
17748opt64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2012081620120812
17748opt64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2012081620120812
17752opt64gcc -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
17756opt64gcc -march=nocona -Os -fomit-frame-pointer2012081620120812
17820opt64gcc -m64 -march=core2 -Os -fomit-frame-pointer2012081620120812
17984opt64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2012081620120812
17996opt64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2012081620120812
21176opt64gcc -march=k8 -O3 -fomit-frame-pointer2012081620120812
21288opt64gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2012081620120812
21304opt64gcc -m64 -march=k8 -O3 -fomit-frame-pointer2012081620120812
21336opt64gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2012081620120812
21348opt64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
21392opt64gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2012081620120812
21436opt64gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2012081620120812
21512opt64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2012081620120812
21568opt64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2012081620120812
21588opt64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
21640opt64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
22344opt64gcc -march=barcelona -O3 -fomit-frame-pointer2012081620120812
35272opt64gcc2012081620120812
35292opt64gcc -funroll-loops2012081620120812
35596opt64cc2012081620120812

Checksum failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer
8c5da0ec5496afb38282403c0342de2ea3bbdd552735074c542e099ad54af78178ff83043169f039a42f171a3fb6b64717c5bce7dd9bde60a643519d4f82e7a2
Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer asm64

Checksum failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -m64 -march=k8 -O2 -fomit-frame-pointer
bb17c5923d4961cfed9f5ddc3b3313cfbc6c4157967e72ddb615028f2d11053d49a7a23bf864e55fc7e11943c57c07d58ae56469422da8f34e30c3444764fd59
Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=k8 -O3 -fomit-frame-pointer asm64
gcc -march=k8 -O2 -fomit-frame-pointer asm64
gcc -march=k8 -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -O2 -fomit-frame-pointer
error 111
crypto_hash writes to input

Number of similar (compiler,implementation) pairs: 12, namely:
CompilerImplementations
gcc -O2 -fomit-frame-pointer asm64
gcc -fno-schedule-insns -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer asm64
gcc -m64 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer asm64
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -O3 -fomit-frame-pointer
error 111
crypto_hash does not handle overlap

Number of similar (compiler,implementation) pairs: 13, namely:
CompilerImplementations
gcc -O3 -fomit-frame-pointer asm64
gcc -fno-schedule-insns -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -O -fomit-frame-pointer asm64
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -O -fomit-frame-pointer asm64
gcc -m64 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer asm64
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -O -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 18, namely:
CompilerImplementations
gcc -O -fomit-frame-pointer asm64
gcc -fno-schedule-insns -O -fomit-frame-pointer asm64
gcc -funroll-loops -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer asm64
gcc -m64 -O -fomit-frame-pointer asm64
gcc -m64 -march=core-avx-i -O -fomit-frame-pointer asm64
gcc -m64 -march=core2 -O -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer asm64
gcc -m64 -march=corei7-avx -O -fomit-frame-pointer asm64
gcc -m64 -march=corei7 -O -fomit-frame-pointer asm64
gcc -m64 -march=k8 -O -fomit-frame-pointer asm64
gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer asm64
gcc -march=k8 -O -fomit-frame-pointer asm64

Compiler output

Implementation: crypto_hash/sarmal512/asm64
Compiler: clang -O3
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:55:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:66:17: note: instantiated into assembly here
sarmal.c: mov rdx,[rsi+7*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:58:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:69:17: note: instantiated into assembly here
sarmal.c: xor rdx,[rsi+6*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 asm64
clang -O3 -mavx asm64