Implementation notes: amd64, sandy, crypto_hash/sarmal256

Computer: sandy
Architecture: amd64
CPU ID: GenuineIntel-000206a7-bfebfbff
SUPERCOP version: 20120908
Operation: crypto_hash
Primitive: sarmal256
TimeImplementationCompilerBenchmark dateSUPERCOP version
12788opt64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
12796opt64gcc -funroll-loops -m64 -O -fomit-frame-pointer2012081620120812
12812opt64gcc -funroll-loops -O -fomit-frame-pointer2012081620120812
12840opt64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2012081620120812
12848opt64gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2012081620120812
12864opt64clang -O3 -mavx2012071220120709
12896asm64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
12904asm64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
12928asm64gcc -march=nocona -O3 -fomit-frame-pointer2012081620120812
12960asm64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2012081620120812
12988opt64clang -O32012071220120709
12996asm64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
13004opt64gcc -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
13076opt64gcc -fno-schedule-insns -O3 -fomit-frame-pointer2012081620120812
13088asm64gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
13096asm64gcc -march=nocona -O2 -fomit-frame-pointer2012081620120812
13104opt64gcc -funroll-loops -O3 -fomit-frame-pointer2012081620120812
13104asm64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
13120opt64gcc -O3 -fomit-frame-pointer2012081620120812
13132opt64gcc -m64 -march=core2 -O3 -fomit-frame-pointer2012081620120812
13148asm64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2012081620120812
13152opt64gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2012081620120812
13156opt64gcc -march=nocona -O3 -fomit-frame-pointer2012081620120812
13164opt64gcc -m64 -O3 -fomit-frame-pointer2012081620120812
13180opt64gcc -m64 -march=corei7 -O3 -fomit-frame-pointer2012081620120812
13188opt64gcc -funroll-loops -m64 -O3 -fomit-frame-pointer2012081620120812
13196opt64gcc -funroll-loops -O2 -fomit-frame-pointer2012081620120812
13200opt64gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2012081620120812
13204opt64gcc -funroll-loops -m64 -O2 -fomit-frame-pointer2012081620120812
13224asm64gcc -funroll-loops -march=nocona -O -fomit-frame-pointer2012081620120812
13228opt64gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2012081620120812
13236asm64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2012081620120812
13248opt64gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2012081620120812
13256opt64gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2012081620120812
13256asm64gcc -m64 -march=core2 -O -fomit-frame-pointer2012081620120812
13276opt64gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2012081620120812
13276asm64gcc -m64 -O -fomit-frame-pointer2012081620120812
13276asm64gcc -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
13280asm64gcc -O -fomit-frame-pointer2012081620120812
13284asm64gcc -fno-schedule-insns -O -fomit-frame-pointer2012081620120812
13292asm64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2012081620120812
13296asm64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2012081620120812
13300opt64gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2012081620120812
13304asm64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2012081620120812
13308opt64gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2012081620120812
13308asm64gcc -march=barcelona -O3 -fomit-frame-pointer2012081620120812
13312asm64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
13332asm64gcc -march=nocona -O -fomit-frame-pointer2012081620120812
13384opt64gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2012081620120812
13392asm64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2012081620120812
13400asm64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
13404opt64gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
13416asm64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
13428asm64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
13448asm64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2012081620120812
13488opt64gcc -m64 -march=core-avx-i -O -fomit-frame-pointer2012081620120812
13488opt64gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2012081620120812
13492opt64gcc -O -fomit-frame-pointer2012081620120812
13492opt64gcc -m64 -march=corei7-avx -O -fomit-frame-pointer2012081620120812
13500opt64gcc -m64 -march=core2 -O -fomit-frame-pointer2012081620120812
13508opt64gcc -fno-schedule-insns -O -fomit-frame-pointer2012081620120812
13508opt64gcc -m64 -march=corei7 -O -fomit-frame-pointer2012081620120812
13512asm64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
13512opt64gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2012081620120812
13516asm64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2012081620120812
13520opt64gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2012081620120812
13524asm64gcc -march=barcelona -O2 -fomit-frame-pointer2012081620120812
13548asm64gcc -march=barcelona -O -fomit-frame-pointer2012081620120812
13552asm64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2012081620120812
13552opt64gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2012081620120812
13572asm64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2012081620120812
13588opt64gcc -m64 -march=nocona -O -fomit-frame-pointer2012081620120812
13600opt64gcc -march=nocona -O -fomit-frame-pointer2012081620120812
13608asm64gcc -m64 -march=corei7 -O -fomit-frame-pointer2012081620120812
13620opt64gcc -m64 -O -fomit-frame-pointer2012081620120812
13668asm64gcc -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
13672asm64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2012081620120812
13676asm64gcc -march=barcelona -Os -fomit-frame-pointer2012081620120812
13684asm64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
13700asm64gcc -m64 -Os -fomit-frame-pointer2012081620120812
13712asm64gcc -Os -fomit-frame-pointer2012081620120812
13740asm64gcc -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
13744asm64gcc -march=nocona -Os -fomit-frame-pointer2012081620120812
13752asm64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2012081620120812
13764asm64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2012081620120812
13804asm64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2012081620120812
13808asm64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
13808asm64gcc -m64 -march=core2 -Os -fomit-frame-pointer2012081620120812
13808opt64gcc -march=nocona -O2 -fomit-frame-pointer2012081620120812
13824asm64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2012081620120812
13832asm64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2012081620120812
13836asm64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
13836asm64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2012081620120812
13836asm64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
13836asm64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
13844asm64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2012081620120812
13844asm64gcc -march=k8 -Os -fomit-frame-pointer2012081620120812
13856asm64gcc -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
13872asm64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2012081620120812
13892opt64gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2012081620120812
13896opt64gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2012081620120812
13908opt64gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2012081620120812
13916asm64gcc -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
13948opt64gcc -m64 -march=nocona -O2 -fomit-frame-pointer2012081620120812
13964opt64gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2012081620120812
13992opt64gcc -m64 -O2 -fomit-frame-pointer2012081620120812
14024opt64gcc -O2 -fomit-frame-pointer2012081620120812
14028opt64gcc -fno-schedule-insns -O2 -fomit-frame-pointer2012081620120812
14036asm64gcc -funroll-loops -Os -fomit-frame-pointer2012081620120812
14084opt64gcc -m64 -march=core2 -O2 -fomit-frame-pointer2012081620120812
14116asm64gcc -funroll-loops2012081620120812
14120asm64gcc2012081620120812
14136asm64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2012081620120812
14148opt64gcc -m64 -march=corei7 -O2 -fomit-frame-pointer2012081620120812
14156opt64gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2012081620120812
14160opt64gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2012081620120812
14176asm64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2012081620120812
14188asm64cc2012081620120812
14188opt64gcc -m64 -march=k8 -O -fomit-frame-pointer2012081620120812
14200opt64gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2012081620120812
14224opt64gcc -march=barcelona -O -fomit-frame-pointer2012081620120812
14252opt64gcc -march=k8 -O -fomit-frame-pointer2012081620120812
14284opt64gcc -m64 -march=k8 -O2 -fomit-frame-pointer2012081620120812
14300opt64gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
14324opt64gcc -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
14344opt64gcc -funroll-loops -march=k8 -O -fomit-frame-pointer2012081620120812
14380opt64gcc -m64 -march=barcelona -O -fomit-frame-pointer2012081620120812
14420opt64gcc -march=barcelona -O2 -fomit-frame-pointer2012081620120812
14484opt64gcc -funroll-loops -Os -fomit-frame-pointer2012081620120812
14496opt64gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
14508opt64gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
14520opt64gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2012081620120812
14520opt64gcc -march=k8 -O2 -fomit-frame-pointer2012081620120812
14544opt64gcc -funroll-loops -m64 -Os -fomit-frame-pointer2012081620120812
14560opt64gcc -march=nocona -Os -fomit-frame-pointer2012081620120812
14568opt64gcc -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
14580opt64gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
14596opt64gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2012081620120812
14656opt64gcc -Os -fomit-frame-pointer2012081620120812
14672opt64gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2012081620120812
14684opt64gcc -m64 -Os -fomit-frame-pointer2012081620120812
14684opt64gcc -march=barcelona -Os -fomit-frame-pointer2012081620120812
14696opt64gcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2012081620120812
14704opt64gcc -march=k8 -Os -fomit-frame-pointer2012081620120812
14708opt64gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2012081620120812
14724opt64gcc -m64 -march=barcelona -Os -fomit-frame-pointer2012081620120812
14728opt64gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2012081620120812
14728opt64gcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2012081620120812
14740opt64gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2012081620120812
14740opt64gcc -m64 -march=k8 -Os -fomit-frame-pointer2012081620120812
14752opt64gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2012081620120812
14780opt64gcc -m64 -march=corei7 -Os -fomit-frame-pointer2012081620120812
14848opt64gcc -m64 -march=core2 -Os -fomit-frame-pointer2012081620120812
14916opt64gcc -fno-schedule-insns -Os -fomit-frame-pointer2012081620120812
17852opt64gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2012081620120812
17852opt64gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2012081620120812
17856opt64gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2012081620120812
17856opt64gcc -march=barcelona -O3 -fomit-frame-pointer2012081620120812
17888opt64gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2012081620120812
17904opt64gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2012081620120812
17984opt64gcc -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
18172opt64gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2012081620120812
18204opt64gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2012081620120812
18212opt64gcc -march=k8 -O3 -fomit-frame-pointer2012081620120812
18612opt64gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2012081620120812
18672opt64gcc -m64 -march=k8 -O3 -fomit-frame-pointer2012081620120812
29016opt64gcc2012081620120812
29072opt64cc2012081620120812
29416opt64gcc -funroll-loops2012081620120812

Checksum failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer
388cb6271b4a2fe361dc417fa6ef2ed8b8068013427cc1f2913da13418f4373e
Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer asm64

Checksum failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -m64 -march=k8 -O2 -fomit-frame-pointer
fe874d2e0aa8e955afcd252edd925467151509c6f98b2c69d569696d4c10e9f6
Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=k8 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=k8 -O3 -fomit-frame-pointer asm64
gcc -march=k8 -O2 -fomit-frame-pointer asm64
gcc -march=k8 -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -O2 -fomit-frame-pointer
error 111
crypto_hash writes to input

Number of similar (compiler,implementation) pairs: 12, namely:
CompilerImplementations
gcc -O2 -fomit-frame-pointer asm64
gcc -fno-schedule-insns -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer asm64
gcc -m64 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer asm64
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer asm64
gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -O3 -fomit-frame-pointer
error 111
crypto_hash does not handle overlap

Number of similar (compiler,implementation) pairs: 13, namely:
CompilerImplementations
gcc -O3 -fomit-frame-pointer asm64
gcc -fno-schedule-insns -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -O -fomit-frame-pointer asm64
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -O -fomit-frame-pointer asm64
gcc -m64 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer asm64
gcc -m64 -march=corei7 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer asm64

Test failure

Implementation: crypto_hash/sarmal256/asm64
Compiler: gcc -funroll-loops -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 8, namely:
CompilerImplementations
gcc -funroll-loops -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer asm64
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer asm64
gcc -m64 -march=k8 -O -fomit-frame-pointer asm64
gcc -march=k8 -O -fomit-frame-pointer asm64

Compiler output

Implementation: crypto_hash/sarmal256/asm64
Compiler: clang -O3
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:55:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:66:17: note: instantiated into assembly here
sarmal.c: mov rdx,[rsi+7*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ASG(8,9,10,11,12,13,14,15,0,1,2,3)
sarmal.c: ^
sarmal.c: sarmal.c:58:2: note: expanded from macro 'ASG'
sarmal.c: AS2(movzx, rcx, bl) \
sarmal.c: ^
sarmal.c: sarmal.c:49:52: note: expanded from macro 'AS2'
sarmal.c: #define AS2(ins,dst,src) #ins " " #dst "," #src "\n\t"
sarmal.c: ^
sarmal.c: gt;:69:17: note: instantiated into assembly here
sarmal.c: xor rdx,[rsi+6*2048+rcx*8]
sarmal.c: ^
sarmal.c: sarmal.c:895:3: error: Expected register
sarmal.c: ...

Number of similar (compiler,implementation) pairs: 2, namely:
CompilerImplementations
clang -O3 asm64
clang -O3 -mavx asm64