Implementation notes: armeabi, novena, crypto_stream/chacha20

Computer: novena
Architecture: armeabi
CPU ID: unknown CPU ID
SUPERCOP version: 20220506
Operation: crypto_stream
Primitive: chacha20
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
112012852 0 024997 424 760moon/neon/32clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
112012796 0 012663 420 744moon/neon/32gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
112012796 0 014015 420 744moon/neon/32gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
112022788 0 011659 412 744moon/neon/32gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
116002852 0 016493 424 752moon/neon/32clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
116002852 0 015963 420 744moon/neon/32clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
116022852 0 017709 424 744moon/neon/32clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
116022852 0 016067 420 744moon/neon/32clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
124012792 0 012230 416 744moon/neon/32gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
176022016 0 015235 420 744moon/armv6/32clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
176022016 0 015131 420 744moon/armv6/32clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
176022016 0 024165 424 760moon/armv6/32clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
180012016 0 016877 424 744moon/armv6/32clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
180011960 0 013179 420 744moon/armv6/32gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
180011956 0 011394 416 744moon/armv6/32gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
180011952 0 010819 412 744moon/armv6/32gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
180021960 0 011827 420 744moon/armv6/32gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
184022016 0 015653 424 752moon/armv6/32clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
224034460 0 126545 424 768dolbeau/arm-neonclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
244025088 0 127185 424 768dolbeau/generic-gccsimd128clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
2480314910 1120 01079029 73729 12008T:cryptoppg++_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
252028568 804 0963606 73613 12008T:cryptoppclang++_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
252029252 804 0963090 73613 12008T:cryptoppclang++_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
252034621 488 0958747 73293 12008T:cryptoppclang++_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
256029396 1120 01072237 73733 12008T:cryptoppg++_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
256028868 1120 01071443 73725 12008T:cryptoppg++_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
256024714 1440 01066609 74045 12008T:cryptoppg++_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
256021852 0 413067 420 752e/refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
2640210209 1836 0963963 74609 12008T:cryptoppclang++_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
264022224 0 413435 420 752e/mergedgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
264031856 0 411715 420 752e/mergedgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
276011660 0 410527 412 744e/mergedgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
276022480 0 117273 424 752e/mergedclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
276022480 0 115527 420 744e/mergedclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
276032492 0 116065 424 760e/mergedclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
276033420 0 414635 420 752e/regsgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
288026060 0 120865 424 752dolbeau/generic-gccsimd128clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
288026212 0 119801 424 760dolbeau/generic-gccsimd128clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
288039788 0 122847 420 744dolbeau/generic-gccsimd256clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
292025784 0 118847 420 744dolbeau/generic-gccsimd128clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
2960210052 0 124857 424 752dolbeau/generic-gccsimd256clang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
296022740 0 124817 424 768e/mergedclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
296035776 0 118935 420 744dolbeau/generic-gccsimd128clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
2960310204 0 123793 424 760dolbeau/generic-gccsimd256clang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
300027524 0 129617 424 768dolbeau/generic-gccsimd256clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
312022480 0 115631 420 744e/mergedclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
312022504 0 411938 416 752e/mergedgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
312032596 0 124669 424 768e/refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
312032644 0 124717 424 768e/regsclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
324021840 0 411699 420 752e/regsgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
324021632 0 410491 412 744e/regsgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
328031400 0 411259 420 752e/refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
328032012 0 411442 416 752e/regsgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
344039824 0 122983 420 744dolbeau/generic-gccsimd256clang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
352032160 0 115725 424 760e/refclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
360032188 0 115227 420 744e/regsclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
364032188 0 115757 424 760e/regsclang_-march=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
368021176 0 410035 412 744e/refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
368041948 0 116733 424 752e/refclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
376031712 0 114755 420 744e/refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
384031720 0 114859 420 744e/refclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
384031604 0 411034 416 752e/refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
396042244 0 115379 420 744e/regsclang_-march=native_-O_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
400034728 0 413607 412 744dolbeau/generic-gccsimd128gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
428042188 0 116973 424 752e/regsclang_-march=native_-O2_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2022060520220506
440045628 0 415078 416 752dolbeau/generic-gccsimd128gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
5520512488 0 423719 420 752dolbeau/generic-gccsimd256gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
5640411988 0 421871 420 752dolbeau/generic-gccsimd256gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
568066148 0 416031 420 752dolbeau/generic-gccsimd128gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
572056732 0 417967 420 752dolbeau/generic-gccsimd128gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
7440612104 0 421558 416 752dolbeau/generic-gccsimd256gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506
8200711472 0 420351 412 744dolbeau/generic-gccsimd256gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2022060520220506

Test failure

Implementation: krovetz/vec128
Security model: constbranchindex
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
error 111

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE krovetz/vec128

Compiler output

Implementation: dolbeau/arm-neon
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
chacha.c: In file included from chacha.c:11:
chacha.c: /usr/lib/llvm-11/lib/clang/11.0.1/include/arm_neon.h:32:2: error: "NEON support not enabled"
chacha.c: #error "NEON support not enabled"
chacha.c: ^
chacha.c: In file included from chacha.c:94:
chacha.c: ./u4.h:30:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_0 = vdupq_n_u32(x[0]);
chacha.c: ^
chacha.c: ./u4.h:31:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_1 = vdupq_n_u32(x[1]);
chacha.c: ^
chacha.c: ./u4.h:32:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_2 = vdupq_n_u32(x[2]);
chacha.c: ^
chacha.c: ./u4.h:33:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_3 = vdupq_n_u32(x[3]);
chacha.c: ^
chacha.c: ./u4.h:34:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_4 = vdupq_n_u32(x[4]);
chacha.c: ^
chacha.c: ./u4.h:35:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_5 = vdupq_n_u32(x[5]);
chacha.c: ^
chacha.c: ./u4.h:36:3: error: use of undeclared identifier 'uint32x4_t'
chacha.c: uint32x4_t x_6 = vdupq_n_u32(x[6]);
chacha.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE dolbeau/arm-neon
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE dolbeau/arm-neon
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE dolbeau/arm-neon
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE dolbeau/arm-neon

Compiler output

Implementation: dolbeau/arm-neon
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
chacha.c: In file included from chacha.c:11:
chacha.c: u4.h: In function ‘crypto_stream_chacha20_dolbeau_arm_neon_constbranchindex_ECRYPT_encrypt_bytes’:
chacha.c: /usr/lib/gcc/arm-linux-gnueabihf/10/include/arm_neon.h:6765:1: error: inlining failed in call to ‘always_inline’ ‘vdupq_n_u32’: target specific option mismatch
chacha.c: 6765 | vdupq_n_u32 (uint32_t __a)
chacha.c: | ^~~~~~~~~~~
chacha.c: In file included from chacha.c:94:
chacha.c: u4.h:45:21: note: called from here
chacha.c: 45 | uint32x4_t x_15 = vdupq_n_u32(x[15]);
chacha.c: | ^~~~~~~~~~~~~~~~~~
chacha.c: In file included from chacha.c:11:
chacha.c: /usr/lib/gcc/arm-linux-gnueabihf/10/include/arm_neon.h:6765:1: error: inlining failed in call to ‘always_inline’ ‘vdupq_n_u32’: target specific option mismatch
chacha.c: 6765 | vdupq_n_u32 (uint32_t __a)
chacha.c: | ^~~~~~~~~~~
chacha.c: In file included from chacha.c:94:
chacha.c: u4.h:44:21: note: called from here
chacha.c: 44 | uint32x4_t x_14 = vdupq_n_u32(x[14]);
chacha.c: | ^~~~~~~~~~~~~~~~~~
chacha.c: In file included from chacha.c:11:
chacha.c: /usr/lib/gcc/arm-linux-gnueabihf/10/include/arm_neon.h:6765:1: error: inlining failed in call to ‘always_inline’ ‘vdupq_n_u32’: target specific option mismatch
chacha.c: 6765 | vdupq_n_u32 (uint32_t __a)
chacha.c: | ^~~~~~~~~~~
chacha.c: In file included from chacha.c:94:
chacha.c: u4.h:41:21: note: called from here
chacha.c: 41 | uint32x4_t x_11 = vdupq_n_u32(x[11]);
chacha.c: | ^~~~~~~~~~~~~~~~~~
chacha.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/arm-neon
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/arm-neon
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/arm-neon
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE dolbeau/arm-neon

Compiler output

Implementation: krovetz/vec128
Security model: constbranchindex
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
stream.c: stream.c:80:2: error: -- Implementation supports only machines with neon, altivec or SSE2
stream.c: #error -- Implementation supports only machines with neon, altivec or SSE2
stream.c: ^
stream.c: stream.c:151:14: warning: implicit declaration of function 'NONCE' is invalid in C99 [-Wimplicit-function-declaration]
stream.c: vec s3 = NONCE(np);
stream.c: ^
stream.c: stream.c:151:9: error: initializing 'vec' (vector of 4 'unsigned int' values) with an expression of incompatible type 'int'
stream.c: vec s3 = NONCE(np);
stream.c: ^ ~~~~~~~~~
stream.c: stream.c:152:36: error: use of undeclared identifier 'VBPI'
stream.c: for (iters = 0; iters < inlen/(BPI*64); iters++) {
stream.c: ^
stream.c: stream.c:91:19: note: expanded from macro 'BPI'
stream.c: #define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
stream.c: ^
stream.c: stream.c:152:36: error: use of undeclared identifier 'GPR_TOO'
stream.c: stream.c:91:26: note: expanded from macro 'BPI'
stream.c: #define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
stream.c: ^
stream.c: stream.c:155:19: error: use of undeclared identifier 'ONE'
stream.c: v7 = v3 + ONE;
stream.c: ^
stream.c: stream.c:176:13: warning: implicit declaration of function 'ROTW16' is invalid in C99 [-Wimplicit-function-declaration]
stream.c: DQROUND_VECTORS(v0,v1,v2,v3)
stream.c: ^
stream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE krovetz/vec128
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE krovetz/vec128
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE krovetz/vec128
clang -march=native -Os -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE krovetz/vec128

Compiler output

Implementation: krovetz/vec128
Security model: constbranchindex
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
stream.c: stream.c:80:2: error: #error -- Implementation supports only machines with neon, altivec or SSE2
stream.c: 80 | #error -- Implementation supports only machines with neon, altivec or SSE2
stream.c: | ^~~~~
stream.c: stream.c: In function ‘crypto_stream_chacha20_krovetz_vec128_constbranchindex_xor’:
stream.c: stream.c:151:14: warning: implicit declaration of function ‘NONCE’ [-Wimplicit-function-declaration]
stream.c: 151 | vec s3 = NONCE(np);
stream.c: | ^~~~~
stream.c: stream.c:151:14: error: incompatible types when initializing type ‘vec’ {aka ‘__vector(4) unsigned int’} using type ‘int’
stream.c: stream.c:91:19: error: ‘VBPI’ undeclared (first use in this function); did you mean ‘BPI’?
stream.c: 91 | #define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
stream.c: | ^~~~
stream.c: stream.c:152:36: note: in expansion of macro ‘BPI’
stream.c: 152 | for (iters = 0; iters < inlen/(BPI*64); iters++) {
stream.c: | ^~~
stream.c: stream.c:91:19: note: each undeclared identifier is reported only once for each function it appears in
stream.c: 91 | #define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
stream.c: | ^~~~
stream.c: stream.c:152:36: note: in expansion of macro ‘BPI’
stream.c: 152 | for (iters = 0; iters < inlen/(BPI*64); iters++) {
stream.c: | ^~~
stream.c: stream.c:91:26: error: ‘GPR_TOO’ undeclared (first use in this function)
stream.c: 91 | #define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
stream.c: | ^~~~~~~
stream.c: stream.c:152:36: note: in expansion of macro ‘BPI’
stream.c: 152 | for (iters = 0; iters < inlen/(BPI*64); iters++) {
stream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE krovetz/vec128
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE krovetz/vec128
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE krovetz/vec128
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE krovetz/vec128