Implementation notes: amd64, haswell, crypto_hash/sarmal256

Computer: haswell
Architecture: amd64
CPU ID: GenuineIntel-000306c3-bfebfbff
SUPERCOP version: 20130419
Operation: crypto_hash
Primitive: sarmal256
TimeImplementationCompilerBenchmark dateSUPERCOP version
9380asm64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2013060820130419
9420asm64gcc -march=nocona -O2 -fomit-frame-pointer2013060820130419
9537asm64gcc -O3 -fomit-frame-pointer2013060820130419
9587asm64gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2013060820130419
9686asm64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
9702asm64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2013060820130419
9735asm64gcc -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
9740asm64gcc -m64 -march=corei7 -O3 -fomit-frame-pointer2013060820130419
9900asm64gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2013060820130419
9900asm64gcc -m64 -march=core2 -O3 -fomit-frame-pointer2013060820130419
9904asm64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2013060820130419
9930asm64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2013060820130419
9984asm64gcc -march=k8 -Os -fomit-frame-pointer2013060820130419
9996asm64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
10044asm64gcc -funroll-loops -m64 -O -fomit-frame-pointer2013060820130419
10052asm64gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
10063asm64gcc -funroll-loops -m64 -O2 -fomit-frame-pointer2013060820130419
10076asm64gcc -march=nocona -O -fomit-frame-pointer2013060820130419
10136asm64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
10148asm64gcc -m64 -O2 -fomit-frame-pointer2013060820130419
10164asm64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2013060820130419
10278asm64gcc -m64 -O3 -fomit-frame-pointer2013060820130419
10278asm64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
10288asm64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2013060820130419
10308asm64gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2013060820130419
10344asm64gcc -march=nocona -O3 -fomit-frame-pointer2013060820130419
10352asm64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2013060820130419
10374asm64gcc -funroll-loops -Os -fomit-frame-pointer2013060820130419
10424asm64gcc -march=barcelona -O2 -fomit-frame-pointer2013060820130419
10432asm64gcc -funroll-loops -O2 -fomit-frame-pointer2013060820130419
10436opt64gcc -funroll-loops -O2 -fomit-frame-pointer2013060820130419
10452asm64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
10518asm64gcc -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
10536asm64gcc -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
10544asm64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2013060820130419
10664asm64gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2013060820130419
10680asm64gcc -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
10692asm64gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2013060820130419
10700asm64gcc -march=barcelona -O -fomit-frame-pointer2013060820130419
10716asm64gcc -Os -fomit-frame-pointer2013060820130419
10724asm64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
10728asm64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2013060820130419
10730asm64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
10732opt64gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
10744asm64gcc -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
10752asm64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2013060820130419
10796opt64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2013060820130419
10812asm64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2013060820130419
10828asm64gcc2013060820130419
10852asm64gcc -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
10864asm64gcc -m64 -O -fomit-frame-pointer2013060820130419
10871opt64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2013060820130419
10964asm64gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2013060820130419
10976asm64gcc -m64 -march=corei7 -O2 -fomit-frame-pointer2013060820130419
11000asm64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2013060820130419
11032asm64gcc -m64 -march=core2 -O -fomit-frame-pointer2013060820130419
11066opt64gcc -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11074opt64gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2013060820130419
11080asm64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
11094asm64gcc -m64 -march=core2 -Os -fomit-frame-pointer2013060820130419
11110asm64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2013060820130419
11116asm64gcc -O2 -fomit-frame-pointer2013060820130419
11116asm64gcc -m64 -march=core2 -O2 -fomit-frame-pointer2013060820130419
11120asm64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11144opt64gcc -funroll-loops -m64 -O -fomit-frame-pointer2013060820130419
11148asm64gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
11149asm64gcc -funroll-loops -O3 -fomit-frame-pointer2013060820130419
11152opt64gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
11157asm64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
11168opt64gcc -m64 -O3 -fomit-frame-pointer2013060820130419
11248asm64gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
11292opt64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2013060820130419
11296opt64clang -O32013060820130419
11304opt64gcc -m64 -march=core2 -O -fomit-frame-pointer2013060820130419
11308opt64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2013060820130419
11314asm64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2013060820130419
11322asm64gcc -O -fomit-frame-pointer2013060820130419
11406asm64gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2013060820130419
11410asm64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
11418asm64gcc -funroll-loops -m64 -O3 -fomit-frame-pointer2013060820130419
11432asm64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11451asm64gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2013060820130419
11451asm64gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2013060820130419
11459asm64gcc -march=barcelona -O3 -fomit-frame-pointer2013060820130419
11500opt64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
11516opt64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
11556asm64gcc -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
11568asm64gcc -funroll-loops2013060820130419
11572asm64gcc -m64 -Os -fomit-frame-pointer2013060820130419
11576opt64gcc -funroll-loops -O3 -fomit-frame-pointer2013060820130419
11576asm64gcc -march=nocona -Os -fomit-frame-pointer2013060820130419
11604asm64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
11608asm64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
11608opt64gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2013060820130419
11612opt64gcc -funroll-loops -m64 -O3 -fomit-frame-pointer2013060820130419
11641asm64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2013060820130419
11648opt64gcc -m64 -O2 -fomit-frame-pointer2013060820130419
11664opt64gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2013060820130419
11706opt64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2013060820130419
11724opt64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2013060820130419
11728opt64gcc -O -fomit-frame-pointer2013060820130419
11736opt64gcc -m64 -march=corei7 -O2 -fomit-frame-pointer2013060820130419
11745opt64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2013060820130419
11797asm64gcc -funroll-loops -O -fomit-frame-pointer2013060820130419
11847asm64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2013060820130419
11847asm64gcc -m64 -march=corei7 -O -fomit-frame-pointer2013060820130419
11863asm64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2013060820130419
11864opt64gcc -march=k8 -O -fomit-frame-pointer2013060820130419
11912asm64cc2013060820130419
11944opt64gcc -m64 -march=k8 -O2 -fomit-frame-pointer2013060820130419
11960opt64gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2013060820130419
11984opt64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2013060820130419
12006opt64gcc -march=nocona -O3 -fomit-frame-pointer2013060820130419
12026asm64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2013060820130419
12028opt64gcc -fno-schedule-insns -O3 -fomit-frame-pointer2013060820130419
12036opt64gcc -O3 -fomit-frame-pointer2013060820130419
12039asm64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2013060820130419
12043asm64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2013060820130419
12048opt64gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2013060820130419
12056asm64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2013060820130419
12056opt64gcc -m64 -march=core2 -O3 -fomit-frame-pointer2013060820130419
12064opt64gcc -funroll-loops -O -fomit-frame-pointer2013060820130419
12068opt64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
12072opt64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2013060820130419
12073asm64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
12076opt64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
12076asm64gcc -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
12090opt64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2013060820130419
12093opt64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2013060820130419
12096opt64gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2013060820130419
12110asm64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2013060820130419
12110asm64gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2013060820130419
12172opt64gcc -m64 -march=core2 -Os -fomit-frame-pointer2013060820130419
12201opt64gcc -funroll-loops -m64 -O2 -fomit-frame-pointer2013060820130419
12204opt64gcc -m64 -march=corei7 -O -fomit-frame-pointer2013060820130419
12204opt64gcc -m64 -march=nocona -O -fomit-frame-pointer2013060820130419
12208opt64gcc -march=nocona -O -fomit-frame-pointer2013060820130419
12224opt64gcc -fno-schedule-insns -O -fomit-frame-pointer2013060820130419
12234opt64gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
12312opt64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2013060820130419
12312opt64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2013060820130419
12324opt64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
12333asm64gcc -march=barcelona -Os -fomit-frame-pointer2013060820130419
12404opt64gcc -march=k8 -O2 -fomit-frame-pointer2013060820130419
12448opt64gcc -march=nocona -Os -fomit-frame-pointer2013060820130419
12490opt64gcc -march=k8 -Os -fomit-frame-pointer2013060820130419
12492opt64gcc -Os -fomit-frame-pointer2013060820130419
12528opt64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2013060820130419
12556opt64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2013060820130419
12557opt64gcc -m64 -march=corei7 -O3 -fomit-frame-pointer2013060820130419
12572opt64gcc -m64 -march=core2 -O2 -fomit-frame-pointer2013060820130419
12592opt64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2013060820130419
12604opt64gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2013060820130419
12612opt64gcc -fno-schedule-insns -O2 -fomit-frame-pointer2013060820130419
12624opt64gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2013060820130419
12628opt64gcc -O2 -fomit-frame-pointer2013060820130419
12678opt64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2013060820130419
12692opt64gcc -m64 -march=k8 -O -fomit-frame-pointer2013060820130419
12697asm64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2013060820130419
12702opt64clang -O3 -mavx2013060820130419
12706opt64gcc -m64 -O -fomit-frame-pointer2013060820130419
12880opt64gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2013060820130419
12928opt64gcc -march=barcelona -O -fomit-frame-pointer2013060820130419
12940opt64gcc -m64 -Os -fomit-frame-pointer2013060820130419
12972opt64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2013060820130419
12980opt64gcc -march=barcelona -Os -fomit-frame-pointer2013060820130419
12988opt64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2013060820130419
12992opt64gcc -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
13025opt64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2013060820130419
13062opt64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2013060820130419
13158opt64gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2013060820130419
13164opt64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2013060820130419
13172opt64gcc -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
13196opt64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2013060820130419
13530opt64gcc -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
13688opt64gcc -funroll-loops -Os -fomit-frame-pointer2013060820130419
13703opt64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2013060820130419
13708opt64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2013060820130419
13725opt64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2013060820130419
13794opt64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2013060820130419
14192opt64gcc -march=nocona -O2 -fomit-frame-pointer2013060820130419
14206opt64gcc -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
14470opt64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2013060820130419
15532opt64gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2013060820130419
15688opt64gcc -march=k8 -O3 -fomit-frame-pointer2013060820130419
15892opt64gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2013060820130419
16028opt64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2013060820130419
16098opt64gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2013060820130419
16312opt64gcc -m64 -march=k8 -O3 -fomit-frame-pointer2013060820130419
16468opt64gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2013060820130419
16520opt64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2013060820130419
16668opt64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2013060820130419
16948opt64gcc -march=barcelona -O3 -fomit-frame-pointer2013060820130419
17230opt64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
18394opt64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2013060820130419
26336opt64cc2013060820130419
28328opt64gcc2013060820130419
28616opt64gcc -funroll-loops2013060820130419

Checksum failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer
388cb6271b4a2fe361dc417fa6ef2ed8b8068013427cc1f2913da13418f4373e
Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer asm64

Checksum failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -m64 -march=k8 -O2 -fomit-frame-pointer
fe874d2e0aa8e955afcd252edd925467151509c6f98b2c69d569696d4c10e9f6
Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
gcc -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -march=k8 -O2 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer
error 142
sh: line 1: 24173 Alarm clock killafter 3600 ./try

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer
error 142
sh: line 1: 26757 Alarm clock killafter 3600 ./try

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -m64 -march=k8 -O3 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=k8 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=k8 -O -fomit-frame-pointer asm64
gcc -march=k8 -O3 -fomit-frame-pointer asm64
gcc -march=k8 -O -fomit-frame-pointer asm64

Compiler output

Implementation: crypto_hash/sarmal256/asm64
Compiler: clang -O3
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:55:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:66:17: note: instantiated into assembly here
sarmal.c: mov rdx,[rsi+7*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:58:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:69:17: note: instantiated into assembly here
sarmal.c: xor rdx,[rsi+6*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 asm64
clang -O3 -mavx asm64