Implementation notes: amd64, latour, crypto_stream/chacha20

Computer: latour
Architecture: amd64
CPU ID: GenuineIntel-000006fb-bfebfbff
SUPERCOP version: 20201130
Operation: crypto_stream
Primitive: chacha20

Time	Object size	Test size	Implementation	Compiler	Benchmark date	SUPERCOP version
5292	3947 0 0	17920 792 800	`moon/sse2/64`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
5292	3936 0 0	15838 784 800	`moon/sse2/64`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
5292	3962 0 0	16539 728 856	`moon/sse2/64`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
5301	3947 0 0	17016 792 800	`moon/sse2/64`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
5301	3947 0 0	17016 792 800	`moon/sse2/64`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
5301	3947 0 0	17792 792 800	`moon/sse2/64`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
5301	3958 0 0	19188 736 856	`moon/sse2/64`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
5301	3941 0 0	15519 712 824	`moon/sse2/64`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
5310	3958 0 0	16931 728 856	`moon/sse2/64`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6228	4690 0 1	17736 792 800	`e/amd64-xmm6`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6228	4761 0 4	17395 728 864	`e/amd64-xmm6`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6237	4690 0 1	18672 792 808	`e/amd64-xmm6`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6237	4690 0 1	17736 792 800	`e/amd64-xmm6`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6246	4837 0 4	17851 728 864	`e/amd64-xmm6`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6255	4690 0 1	18544 792 808	`e/amd64-xmm6`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6255	4606 0 4	16223 712 832	`e/amd64-xmm6`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6273	4837 0 4	20076 736 864	`e/amd64-xmm6`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6282	4639 0 1	16558 784 800	`e/amd64-xmm6`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6381	4645 0 4	17723 728 864	`amd64-ssse3`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6390	3737 0 0	15630 784 800	`moon/ssse3/64`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6390	3759 0 0	18980 736 856	`moon/ssse3/64`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6399	3748 0 0	16808 792 800	`moon/ssse3/64`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6399	3748 0 0	17712 792 800	`moon/ssse3/64`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6399	3748 0 0	17584 792 800	`moon/ssse3/64`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6399	3759 0 0	16723 728 856	`moon/ssse3/64`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6399	3763 0 0	16331 728 856	`moon/ssse3/64`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6399	3742 0 0	15311 712 824	`moon/ssse3/64`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6426	4498 0 1	18512 792 808	`amd64-ssse3`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6435	3748 0 0	16808 792 800	`moon/ssse3/64`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6453	4414 0 4	16063 712 832	`amd64-ssse3`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6462	4498 0 1	17576 792 800	`amd64-ssse3`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6462	4447 0 1	16430 784 800	`amd64-ssse3`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6471	4645 0 4	19916 736 864	`amd64-ssse3`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6471	4569 0 4	17267 728 864	`amd64-ssse3`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
6480	4498 0 1	17576 792 800	`amd64-ssse3`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6480	4498 0 1	18384 792 808	`amd64-ssse3`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6597	4792 0 1	16742 784 800	`dolbeau/amd64-avx2`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6624	5353 0 1	18448 792 800	`dolbeau/amd64-avx2`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6642	5353 0 1	19384 792 808	`dolbeau/amd64-avx2`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
6642	5353 0 1	18448 792 800	`dolbeau/amd64-avx2`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
7353	4675 0 4	16343 712 832	`dolbeau/amd64-avx2`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
7380	6256 0 4	21564 736 864	`dolbeau/amd64-avx2`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
7434	5481 0 4	18563 728 864	`dolbeau/amd64-avx2`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
7812	5298 0 4	17979 728 864	`dolbeau/amd64-avx2`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
11646	2754 0 1	15800 792 800	`e/amd64-3`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
11682	2754 0 1	16736 792 808	`e/amd64-3`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
11700	2902 0 4	18140 736 864	`e/amd64-3`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
11709	2673 0 4	14255 712 832	`e/amd64-3`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
11718	2754 0 1	15800 792 800	`e/amd64-3`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
11718	2700 0 1	14606 784 800	`e/amd64-3`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
11736	2826 0 4	15459 728 864	`e/amd64-3`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
11754	2754 0 1	16608 792 808	`e/amd64-3`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
11763	2902 0 4	15915 728 864	`e/amd64-3`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
12546	3220 0 4	18508 736 864	`e/ref`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
12564	3236 0 4	18524 736 864	`e/regs`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
12726	4080 0 4	19356 736 864	`e/merged`	`gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
13104	2601 0 4	15651 728 864	`e/merged`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
13149	2311 0 1	14246 784 800	`e/merged`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
13338	2484 0 4	15147 728 864	`e/merged`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
13374	2339 0 4	13975 712 832	`e/merged`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
13482	2665 0 1	16680 792 808	`e/merged`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
13482	2665 0 1	16536 792 808	`e/merged`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
13500	2665 0 1	15744 792 800	`e/merged`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
13500	2665 0 1	15744 792 800	`e/merged`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
14247	2601 0 1	15680 792 800	`e/ref`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
14409	2601 0 1	16472 792 808	`e/ref`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
14427	2601 0 1	16616 792 808	`e/ref`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
14454	2601 0 1	15680 792 800	`e/ref`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
14571	2537 0 1	15616 792 800	`e/regs`	`clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
15345	2537 0 1	16552 792 808	`e/regs`	`clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
15642	2537 0 1	16408 792 808	`e/regs`	`clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
15705	2537 0 1	15616 792 800	`e/regs`	`clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
16452	2442 0 4	15507 728 864	`e/regs`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
16848	2362 0 4	15019 728 864	`e/regs`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
17802	2243 0 4	14891 728 864	`e/ref`	`gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
17802	2028 0 1	13966 784 800	`e/regs`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
17937	2061 0 1	13990 784 800	`e/ref`	`clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE`	20200830	20200826
19008	2378 0 4	15443 728 864	`e/ref`	`gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
19197	2190 0 4	13815 712 832	`e/regs`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826
22122	2079 0 4	13703 712 832	`e/ref`	`gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE`	20200830	20200826

Test failure

Implementation: dolbeau/generic-gccsimd128
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE

error 111

Number of similar (compiler,implementation) pairs: 44, namely:

Compiler	Implementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd128
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd128
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd128
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd128
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd256
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd256
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd256
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	dolbeau/generic-gccsimd256
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/vec128
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/vec128
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/vec128
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/vec128
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/vec128
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/vec128
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/vec128
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/vec128
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/vec128
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx/64
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx/64
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx/64
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx/64
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx/64
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx/64
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx/64
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx/64
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx/64
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx2/64
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx2/64
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx2/64
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx2/64
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/avx2/64
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx2/64
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx2/64
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx2/64
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/avx2/64
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/xop/64
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/xop/64
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/xop/64
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/xop/64
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	moon/xop/64
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/xop/64
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/xop/64
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/xop/64
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	moon/xop/64

Compiler output

Implementation: dolbeau/amd64-avx2
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE

chacha.c: In file included from chacha.c:103:
chacha.c: ./u4.h:122:7: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'crypto_stream_chacha20_dolbeau_amd64_avx2_constbranchindex_ECRYPT_encrypt_bytes' that is compiled without support for 'ssse3'
chacha.c: VEC4_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^
chacha.c: ./u4.h:17:36: note: expanded from macro 'VEC4_QUARTERROUND'
chacha.c: #define VEC4_QUARTERROUND(a,b,c,d) VEC4_QUARTERROUND_SHUFFLE(a,b,c,d)
chacha.c: ^
chacha.c: ./u4.h:12:86: note: expanded from macro 'VEC4_QUARTERROUND_SHUFFLE'
chacha.c: x_##a = _mm_add_epi32(x_##a, x_##b); t_##a = _mm_xor_si128(x_##d, x_##a); x_##d = _mm_shuffle_epi8(t_##a, rot16); \
chacha.c: ^
chacha.c: ./u4.h:122:7: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'crypto_stream_chacha20_dolbeau_amd64_avx2_constbranchindex_ECRYPT_encrypt_bytes' that is compiled without support for 'ssse3'
chacha.c: ./u4.h:17:36: note: expanded from macro 'VEC4_QUARTERROUND'
chacha.c: #define VEC4_QUARTERROUND(a,b,c,d) VEC4_QUARTERROUND_SHUFFLE(a,b,c,d)
chacha.c: ^
chacha.c: ./u4.h:14:86: note: expanded from macro 'VEC4_QUARTERROUND_SHUFFLE'
chacha.c: x_##a = _mm_add_epi32(x_##a, x_##b); t_##a = _mm_xor_si128(x_##d, x_##a); x_##d = _mm_shuffle_epi8(t_##a, rot8); \
chacha.c: ^
chacha.c: ./u4.h:123:7: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'crypto_stream_chacha20_dolbeau_amd64_avx2_constbranchindex_ECRYPT_encrypt_bytes' that is compiled without support for 'ssse3'
chacha.c: VEC4_QUARTERROUND( 1, 5, 9,13);
chacha.c: ^
chacha.c: ./u4.h:17:36: note: expanded from macro 'VEC4_QUARTERROUND'
chacha.c: #define VEC4_QUARTERROUND(a,b,c,d) VEC4_QUARTERROUND_SHUFFLE(a,b,c,d)
chacha.c: ^
chacha.c: ./u4.h:12:86: note: expanded from macro 'VEC4_QUARTERROUND_SHUFFLE'
chacha.c: x_##a = _mm_add_epi32(x_##a, x_##b); t_##a = _mm_xor_si128(x_##d, x_##a); x_##d = _mm_shuffle_epi8(t_##a, rot16); \
chacha.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:

Compiler	Implementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/amd64-avx2

Compiler output

Implementation: dolbeau/generic-gccsimd128
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE

chacha.c: In file included from chacha.c:93:
chacha.c: ./u4.h:127:7: error: cannot convert between vector values of different size ('V' (vector of 4 'unsigned int' values) and 'int')
chacha.c: VEC4_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
chacha.c: ./u4.h:29:56: note: expanded from macro 'VEC4_QUARTERROUND'
chacha.c: x_##a = x_##a + x_##b; t_##a = x_##d ^ x_##a; x_##d = VEC4_ROT(t_##a, 16); \
chacha.c: ^ ~~~~~ ~~
chacha.c: ./u4.h:23:29: note: expanded from macro 'VEC4_ROT'
chacha.c: #define VEC4_ROT(a,imm) ((a << imm) | (a >> (32-imm)))
chacha.c: ~ ^ ~~~
chacha.c: ./u4.h:127:7: error: cannot convert between vector values of different size ('V' (vector of 4 'unsigned int' values) and 'int')
chacha.c: VEC4_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
chacha.c: ./u4.h:29:56: note: expanded from macro 'VEC4_QUARTERROUND'
chacha.c: x_##a = x_##a + x_##b; t_##a = x_##d ^ x_##a; x_##d = VEC4_ROT(t_##a, 16); \
chacha.c: ^~~~~~~~~~~~~~~~~~~
chacha.c: ./u4.h:23:42: note: expanded from macro 'VEC4_ROT'
chacha.c: #define VEC4_ROT(a,imm) ((a << imm) | (a >> (32-imm)))
chacha.c: ~ ^ ~~~~~~~~
chacha.c: ./u4.h:127:7: error: cannot convert between vector values of different size ('V' (vector of 4 'unsigned int' values) and 'int')
chacha.c: VEC4_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
chacha.c: ./u4.h:30:56: note: expanded from macro 'VEC4_QUARTERROUND'
chacha.c: x_##c = x_##c + x_##d; t_##c = x_##b ^ x_##c; x_##b = VEC4_ROT(t_##c, 12); \
chacha.c: ^ ~~~~~ ~~
chacha.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:

Compiler	Implementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd128
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd128
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd128
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd128
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd128

Compiler output

Implementation: dolbeau/generic-gccsimd256
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE

chacha.c: In file included from chacha.c:93:
chacha.c: ./u8.h:138:7: error: cannot convert between vector values of different size ('V' (vector of 8 'unsigned int' values) and 'int')
chacha.c: VEC8_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
chacha.c: ./u8.h:32:56: note: expanded from macro 'VEC8_QUARTERROUND'
chacha.c: x_##a = x_##a + x_##b; t_##a = x_##d ^ x_##a; x_##d = VEC8_ROT(t_##a, 16); \
chacha.c: ^ ~~~~~ ~~
chacha.c: ./u8.h:26:29: note: expanded from macro 'VEC8_ROT'
chacha.c: #define VEC8_ROT(a,imm) ((a << imm) | (a >> (32-imm)))
chacha.c: ~ ^ ~~~
chacha.c: ./u8.h:138:7: error: cannot convert between vector values of different size ('V' (vector of 8 'unsigned int' values) and 'int')
chacha.c: VEC8_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
chacha.c: ./u8.h:32:56: note: expanded from macro 'VEC8_QUARTERROUND'
chacha.c: x_##a = x_##a + x_##b; t_##a = x_##d ^ x_##a; x_##d = VEC8_ROT(t_##a, 16); \
chacha.c: ^~~~~~~~~~~~~~~~~~~
chacha.c: ./u8.h:26:42: note: expanded from macro 'VEC8_ROT'
chacha.c: #define VEC8_ROT(a,imm) ((a << imm) | (a >> (32-imm)))
chacha.c: ~ ^ ~~~~~~~~
chacha.c: ./u8.h:138:7: error: cannot convert between vector values of different size ('V' (vector of 8 'unsigned int' values) and 'int')
chacha.c: VEC8_QUARTERROUND( 0, 4, 8,12);
chacha.c: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
chacha.c: ./u8.h:33:56: note: expanded from macro 'VEC8_QUARTERROUND'
chacha.c: x_##c = x_##c + x_##d; t_##c = x_##b ^ x_##c; x_##b = VEC8_ROT(t_##c, 12); \
chacha.c: ^ ~~~~~ ~~
chacha.c: ...

Number of similar (compiler,implementation) pairs: 5, namely:

Compiler	Implementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd256
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd256
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd256
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd256
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	dolbeau/generic-gccsimd256

Compiler output

Implementation: goll_gueron
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE

stream.c: stream.c:126:2: error: -- Implementation supports only microarchitectures with support for Advanced Vector Extensions (AVX2 or AVX512).
stream.c: #error -- Implementation supports only microarchitectures with support for Advanced Vector Extensions (AVX2 or AVX512).
stream.c: ^
stream.c: 1 error generated.

Number of similar (compiler,implementation) pairs: 5, namely:

Compiler	Implementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	goll_gueron
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	goll_gueron
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	goll_gueron
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	goll_gueron
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	goll_gueron

Compiler output

Implementation: goll_gueron
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE

stream.c: stream.c:126:2: error: #error -- Implementation supports only microarchitectures with support for Advanced Vector Extensions (AVX2 or AVX512).
stream.c: #error -- Implementation supports only microarchitectures with support for Advanced Vector Extensions (AVX2 or AVX512).
stream.c: ^

Number of similar (compiler,implementation) pairs: 4, namely:

Compiler	Implementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	goll_gueron
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	goll_gueron
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	goll_gueron
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	goll_gueron

Compiler output

Implementation: krovetz/avx2
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE

stream.c: stream.c:56:18: warning: implicit declaration of function '_mm_broadcastsi128_si256' is invalid in C99 [-Wimplicit-function-declaration]
stream.c: __m256i s0 = _mm_broadcastsi128_si256((__m128i *)sigma);
stream.c: ^
stream.c: stream.c:56:13: error: initializing '__m256i' (vector of 4 'long long' values) with an expression of incompatible type 'int'
stream.c: __m256i s0 = _mm_broadcastsi128_si256((__m128i *)sigma);
stream.c: ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stream.c: 1 warning and 1 error generated.

Number of similar (compiler,implementation) pairs: 5, namely:

Compiler	Implementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/avx2
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/avx2
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/avx2
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/avx2
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE	krovetz/avx2

Compiler output

Implementation: krovetz/avx2
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE

stream.c: stream.c: In function 'crypto_stream_chacha20_krovetz_avx2_constbranchindex_xor':
stream.c: stream.c:58:13: warning: AVX vector return without AVX enabled changes the ABI [-Wpsabi]
stream.c: __m256i s0 = _mm256_broadcastsi128_si256(*(__m128i *)sigma);
stream.c: ^
stream.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
stream.c: from stream.c:8:
stream.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:574:1: error: inlining failed in call to always_inline '_mm256_or_si256': target specific option mismatch
stream.c: _mm256_or_si256 (__m256i __A, __m256i __B)
stream.c: ^
stream.c: stream.c:63:13: error: called from here
stream.c: __m256i s3 = _mm256_or_si256(
stream.c: ^
stream.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
stream.c: from stream.c:8:
stream.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:655:1: error: inlining failed in call to always_inline '_mm256_slli_si256': target specific option mismatch
stream.c: _mm256_slli_si256 (__m256i __A, const int __N)
stream.c: ^
stream.c: stream.c:63:18: error: called from here
stream.c: __m256i s3 = _mm256_or_si256(
stream.c: ^
stream.c: In file included from /usr/lib/gcc/x86_64-linux-gnu/5/include/immintrin.h:43:0,
stream.c: from stream.c:8:
stream.c: /usr/lib/gcc/x86_64-linux-gnu/5/include/avx2intrin.h:1006:1: error: inlining failed in call to always_inline '_mm256_broadcastq_epi64': target specific option mismatch
stream.c: _mm256_broadcastq_epi64 (__m128i __X)
stream.c: ^
stream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:

Compiler	Implementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/avx2
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/avx2
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/avx2
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE	krovetz/avx2