Implementation notes: amd64, haswell, crypto_hash/sarmal512

Computer: haswell
Architecture: amd64
CPU ID: GenuineIntel-000306c3-bfebfbff
SUPERCOP version: 20130419
Operation: crypto_hash
Primitive: sarmal512
TimeImplementationCompilerBenchmark dateSUPERCOP version
11316asm64gcc -funroll-loops -m64 -O3 -fomit-frame-pointer2013060820130419
11324asm64gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2013060820130419
11328asm64gcc -O3 -fomit-frame-pointer2013060820130419
11332asm64gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2013060820130419
11332asm64gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2013060820130419
11332asm64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
11340asm64gcc -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
11340asm64gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2013060820130419
11340asm64gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2013060820130419
11344asm64gcc -m64 -O3 -fomit-frame-pointer2013060820130419
11344asm64gcc -m64 -march=core2 -O3 -fomit-frame-pointer2013060820130419
11348asm64gcc -funroll-loops -O3 -fomit-frame-pointer2013060820130419
11348asm64gcc -m64 -march=corei7 -O3 -fomit-frame-pointer2013060820130419
11352asm64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
11352asm64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2013060820130419
11364asm64gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
11372asm64gcc -march=nocona -O3 -fomit-frame-pointer2013060820130419
11424asm64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2013060820130419
11428asm64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
11440asm64gcc -funroll-loops -O2 -fomit-frame-pointer2013060820130419
11444asm64gcc -funroll-loops -m64 -O2 -fomit-frame-pointer2013060820130419
11464asm64gcc -funroll-loops -O -fomit-frame-pointer2013060820130419
11468asm64gcc -funroll-loops -m64 -O -fomit-frame-pointer2013060820130419
11476asm64gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
11484asm64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
11484asm64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
11488asm64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2013060820130419
11492asm64gcc -m64 -O -fomit-frame-pointer2013060820130419
11496asm64gcc -m64 -march=core2 -O -fomit-frame-pointer2013060820130419
11500asm64gcc -O -fomit-frame-pointer2013060820130419
11500asm64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2013060820130419
11500asm64gcc -m64 -march=corei7 -O -fomit-frame-pointer2013060820130419
11500asm64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2013060820130419
11504asm64gcc -march=nocona -O2 -fomit-frame-pointer2013060820130419
11504asm64gcc -march=nocona -O -fomit-frame-pointer2013060820130419
11508asm64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2013060820130419
11508asm64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2013060820130419
11508asm64gcc -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
11512asm64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2013060820130419
11524asm64gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2013060820130419
11528asm64gcc -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
11532asm64gcc -march=barcelona -O3 -fomit-frame-pointer2013060820130419
11536asm64gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2013060820130419
11540asm64gcc -O2 -fomit-frame-pointer2013060820130419
11540asm64gcc -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
11540asm64gcc -m64 -O2 -fomit-frame-pointer2013060820130419
11544asm64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
11548asm64gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2013060820130419
11548asm64gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2013060820130419
11556asm64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2013060820130419
11556asm64gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2013060820130419
11560asm64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
11560asm64gcc -m64 -march=corei7 -O2 -fomit-frame-pointer2013060820130419
11576asm64gcc -m64 -march=core2 -O2 -fomit-frame-pointer2013060820130419
11688asm64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11692asm64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11732asm64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2013060820130419
11744asm64gcc -march=barcelona -O -fomit-frame-pointer2013060820130419
11748asm64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
11752asm64gcc -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
11780asm64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2013060820130419
11780asm64gcc -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11784asm64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2013060820130419
11796asm64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
12048asm64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2013060820130419
12048asm64gcc -m64 -march=core2 -Os -fomit-frame-pointer2013060820130419
12048asm64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2013060820130419
12056asm64gcc -Os -fomit-frame-pointer2013060820130419
12056asm64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2013060820130419
12060asm64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2013060820130419
12068asm64gcc -m64 -Os -fomit-frame-pointer2013060820130419
12072asm64gcc -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
12076asm64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
12076asm64gcc -march=k8 -Os -fomit-frame-pointer2013060820130419
12080asm64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2013060820130419
12084asm64gcc -march=barcelona -Os -fomit-frame-pointer2013060820130419
12088asm64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2013060820130419
12088asm64gcc -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
12100asm64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2013060820130419
12104asm64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2013060820130419
12108asm64gcc -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
12112asm64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
12112asm64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2013060820130419
12116asm64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
12128asm64gcc -march=nocona -Os -fomit-frame-pointer2013060820130419
12136asm64gcc -funroll-loops -Os -fomit-frame-pointer2013060820130419
12148asm64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
12152asm64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2013060820130419
12160asm64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
12308asm64gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
13004asm64gcc2013060820130419
13068opt64gcc -funroll-loops -O -fomit-frame-pointer2013060820130419
13124asm64gcc -funroll-loops2013060820130419
13124opt64gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
13141opt64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2013060820130419
13145opt64gcc -m64 -march=core2 -O -fomit-frame-pointer2013060820130419
13448opt64gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2013060820130419
13472asm64cc2013060820130419
13506opt64gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
13532opt64gcc -m64 -O2 -fomit-frame-pointer2013060820130419
13548opt64gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2013060820130419
13564opt64gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2013060820130419
13572opt64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2013060820130419
13576opt64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2013060820130419
13580opt64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
13586opt64gcc -funroll-loops -m64 -O -fomit-frame-pointer2013060820130419
13588opt64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
13593opt64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2013060820130419
13596opt64gcc -m64 -O -fomit-frame-pointer2013060820130419
13596opt64gcc -m64 -march=corei7 -O -fomit-frame-pointer2013060820130419
13604opt64gcc -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
13616opt64gcc -O -fomit-frame-pointer2013060820130419
13624opt64gcc -march=nocona -O -fomit-frame-pointer2013060820130419
13856opt64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2013060820130419
13872opt64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
13961opt64gcc -m64 -O3 -fomit-frame-pointer2013060820130419
13964opt64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
14064opt64clang -O32013060820130419
14081opt64gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2013060820130419
14088opt64gcc -march=barcelona -O2 -fomit-frame-pointer2013060820130419
14126opt64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2013060820130419
14128opt64gcc -m64 -march=k8 -O2 -fomit-frame-pointer2013060820130419
14168opt64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2013060820130419
14193opt64gcc -march=k8 -O -fomit-frame-pointer2013060820130419
14243opt64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
14243opt64gcc -m64 -march=core2 -Os -fomit-frame-pointer2013060820130419
14247opt64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2013060820130419
14334opt64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
14351opt64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2013060820130419
14364opt64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
14384opt64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
14420opt64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2013060820130419
14496opt64gcc -march=barcelona -O -fomit-frame-pointer2013060820130419
14504opt64gcc -m64 -march=k8 -O -fomit-frame-pointer2013060820130419
14537opt64gcc -march=nocona -Os -fomit-frame-pointer2013060820130419
14578opt64clang -O3 -mavx2013060820130419
14588opt64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
14628opt64gcc -march=barcelona -Os -fomit-frame-pointer2013060820130419
14640opt64gcc -m64 -Os -fomit-frame-pointer2013060820130419
14646opt64gcc -march=k8 -O2 -fomit-frame-pointer2013060820130419
14660opt64gcc -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
14676opt64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2013060820130419
14692opt64gcc -funroll-loops -O2 -fomit-frame-pointer2013060820130419
14696opt64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2013060820130419
14708opt64gcc -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
14720opt64gcc -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
14752opt64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2013060820130419
14780opt64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2013060820130419
14948opt64gcc -march=nocona -O3 -fomit-frame-pointer2013060820130419
14972opt64gcc -funroll-loops -Os -fomit-frame-pointer2013060820130419
14972opt64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
15008opt64gcc -march=nocona -O2 -fomit-frame-pointer2013060820130419
15068opt64gcc -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
15080opt64gcc -O2 -fomit-frame-pointer2013060820130419
15096opt64gcc -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
15118opt64gcc -m64 -march=corei7 -O2 -fomit-frame-pointer2013060820130419
15122opt64gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2013060820130419
15142opt64gcc -m64 -march=core2 -O2 -fomit-frame-pointer2013060820130419
15148opt64gcc -funroll-loops -O3 -fomit-frame-pointer2013060820130419
15176opt64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2013060820130419
15180opt64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
15220opt64gcc -march=k8 -Os -fomit-frame-pointer2013060820130419
15226opt64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
15236opt64gcc -Os -fomit-frame-pointer2013060820130419
15260opt64gcc -funroll-loops -m64 -O2 -fomit-frame-pointer2013060820130419
15312opt64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2013060820130419
15496opt64gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
15548opt64gcc -m64 -march=corei7 -O3 -fomit-frame-pointer2013060820130419
15564opt64gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2013060820130419
15578opt64gcc -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
15604opt64gcc -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
15685opt64gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2013060820130419
15724opt64gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2013060820130419
15780opt64gcc -funroll-loops -m64 -O3 -fomit-frame-pointer2013060820130419
15816opt64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2013060820130419
15843opt64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2013060820130419
15876opt64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2013060820130419
15948opt64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2013060820130419
16224opt64gcc -O3 -fomit-frame-pointer2013060820130419
16227opt64gcc -m64 -march=core2 -O3 -fomit-frame-pointer2013060820130419
16228opt64gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2013060820130419
16364opt64gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2013060820130419
16613opt64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2013060820130419
17578opt64gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2013060820130419
17649opt64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
18270opt64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
18388opt64gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2013060820130419
18876opt64gcc -m64 -march=k8 -O3 -fomit-frame-pointer2013060820130419
18880opt64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2013060820130419
18926opt64gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2013060820130419
19020opt64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2013060820130419
19568opt64gcc -march=barcelona -O3 -fomit-frame-pointer2013060820130419
19580opt64gcc -march=k8 -O3 -fomit-frame-pointer2013060820130419
19648opt64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
19896opt64gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2013060820130419
32000opt64cc2013060820130419
34412opt64gcc2013060820130419
35852opt64gcc -funroll-loops2013060820130419

Checksum failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer
8c5da0ec5496afb38282403c0342de2ea3bbdd552735074c542e099ad54af78178ff83043169f039a42f171a3fb6b64717c5bce7dd9bde60a643519d4f82e7a2
Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer asm64

Checksum failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -m64 -march=k8 -O2 -fomit-frame-pointer
bb17c5923d4961cfed9f5ddc3b3313cfbc6c4157967e72ddb615028f2d11053d49a7a23bf864e55fc7e11943c57c07d58ae56469422da8f34e30c3444764fd59
Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
gcc -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -march=k8 -O2 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer
error 142
sh: line 1: 25236 Alarm clock killafter 3600 ./try

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer
error 142
sh: line 1: 32431 Alarm clock killafter 3600 ./try

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal512/asm64
Compiler: gcc -m64 -march=k8 -O3 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=k8 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=k8 -O -fomit-frame-pointer asm64
gcc -march=k8 -O3 -fomit-frame-pointer asm64
gcc -march=k8 -O -fomit-frame-pointer asm64

Compiler output

Implementation: crypto_hash/sarmal512/asm64
Compiler: clang -O3
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:55:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:66:17: note: instantiated into assembly here
sarmal.c: mov rdx,[rsi+7*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:58:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:69:17: note: instantiated into assembly here
sarmal.c: xor rdx,[rsi+6*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 asm64
clang -O3 -mavx asm64