Implementation notes: amd64, skylake, crypto_aead/scream12v1

Computer: skylake
Architecture: amd64
CPU ID: GenuineIntel-000506e3-bfebfbff
SUPERCOP version: 20161026
Operation: crypto_aead
Primitive: scream12v1
TimeImplementationCompilerBenchmark dateSUPERCOP version
68636ssegcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2016121620161026
69208ssegcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2016121620161026
69258ssegcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2016121620161026
70746ssegcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016121620161026
71014ssegcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2016121620161026
75004ssegcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2016121620161026
75038ssegcc -m64 -march=corei7 -O3 -fomit-frame-pointer2016121620161026
75242ssegcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2016121620161026
75586ssegcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2016121620161026
75706ssegcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2016121620161026
75872ssegcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2016121620161026
75920ssegcc -m64 -march=core2 -O3 -fomit-frame-pointer2016121620161026
76250sseclang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2016121620161026
76420sseclang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2016121620161026
76532sseclang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments2016121620161026
76782ssegcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2016121620161026
76784ssegcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2016121620161026
76834ssegcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2016121620161026
76998ssegcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2016121620161026
77092sseclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016121620161026
77206sseclang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2016121620161026
77486ssegcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2016121620161026
77942sseclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2016121620161026
79358ssegcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016121620161026
81152ssegcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016121620161026
81944ssegcc -m64 -march=corei7 -O2 -fomit-frame-pointer2016121620161026
81982ssegcc -m64 -march=core2 -O2 -fomit-frame-pointer2016121620161026
82238ssegcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2016121620161026
82240ssegcc -m64 -march=core-avx2 -O -fomit-frame-pointer2016121620161026
82310ssegcc -m64 -march=core-avx-i -O -fomit-frame-pointer2016121620161026
82700ssegcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2016121620161026
82938ssegcc -m64 -march=corei7-avx -O -fomit-frame-pointer2016121620161026
83022ssegcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016121620161026
84776ssegcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2016121620161026
88186ssegcc -m64 -march=corei7 -O -fomit-frame-pointer2016121620161026
88344ssegcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2016121620161026
88678ssegcc -m64 -march=core2 -O -fomit-frame-pointer2016121620161026
88714ssegcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2016121620161026
120790ssegcc -m64 -march=corei7 -Os -fomit-frame-pointer2016121620161026
121366ssegcc -m64 -march=core2 -Os -fomit-frame-pointer2016121620161026
121434ssegcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2016121620161026
121766ssegcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2016121620161026
316182refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2016121620161026
316562refgcc -m64 -march=corei7 -O3 -fomit-frame-pointer2016121620161026
316618refgcc -m64 -march=core2 -O3 -fomit-frame-pointer2016121620161026
316668refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2016121620161026
316682refgcc -march=k8 -O3 -fomit-frame-pointer2016121620161026
316692refgcc -m64 -march=core2 -msse4 -O3 -fomit-frame-pointer2016121620161026
316738refgcc -m64 -march=core2 -msse4.1 -O3 -fomit-frame-pointer2016121620161026
316862refgcc -march=barcelona -O3 -fomit-frame-pointer2016121620161026
316900refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2016121620161026
316914refgcc -march=nocona -O3 -fomit-frame-pointer2016121620161026
317138refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2016121620161026
317644refgcc -m64 -march=core-avx2 -O3 -fomit-frame-pointer2016121620161026
317994refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2016121620161026
318278refgcc -m64 -march=corei7-avx -O3 -fomit-frame-pointer2016121620161026
318700refgcc -m64 -march=core-avx-i -O3 -fomit-frame-pointer2016121620161026
319430refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2016121620161026
320232refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2016121620161026
322410refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2016121620161026
322414refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2016121620161026
322426refgcc -funroll-loops -O2 -fomit-frame-pointer2016121620161026
322526refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2016121620161026
322616refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2016121620161026
322770refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2016121620161026
322816refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2016121620161026
322946refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2016121620161026
323478refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2016121620161026
323910refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2016121620161026
324940refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2016121620161026
326606refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016121620161026
327282refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2016121620161026
333882refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2016121620161026
334344refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2016121620161026
334674refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2016121620161026
334820refgcc -m64 -O3 -fomit-frame-pointer2016121620161026
335286refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2016121620161026
335518refgcc -funroll-loops -O3 -fomit-frame-pointer2016121620161026
336450refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2016121620161026
336718refgcc -O3 -fomit-frame-pointer2016121620161026
338368refgcc -funroll-loops -m64 -O -fomit-frame-pointer2016121620161026
338508refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2016121620161026
338918refgcc -funroll-loops -O -fomit-frame-pointer2016121620161026
339324refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2016121620161026
339758refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2016121620161026
340064refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2016121620161026
340614refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2016121620161026
497044refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016121620161026
497430refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2016121620161026
508958refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2016121620161026
509310refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2016121620161026
510346refgcc -m64 -march=core-avx-i -O2 -fomit-frame-pointer2016121620161026
510378refgcc -O2 -fomit-frame-pointer2016121620161026
510478refgcc -march=barcelona -O2 -fomit-frame-pointer2016121620161026
510632refgcc -m64 -O2 -fomit-frame-pointer2016121620161026
510918refgcc -m64 -march=core-avx2 -O2 -fomit-frame-pointer2016121620161026
511106refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2016121620161026
511542refgcc -m64 -march=corei7-avx -O2 -fomit-frame-pointer2016121620161026
511984refgcc -march=nocona -O2 -fomit-frame-pointer2016121620161026
512016refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2016121620161026
512020refgcc -march=k8 -O2 -fomit-frame-pointer2016121620161026
512312refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2016121620161026
513088refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2016121620161026
513286refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2016121620161026
513550refclang -O3 -fwrapv -mavx -fomit-frame-pointer -Qunused-arguments2016121620161026
513708refclang -O3 -fwrapv -mavx2 -fomit-frame-pointer -Qunused-arguments2016121620161026
513862refclang -O3 -fwrapv -march=x86-64 -mcpu=core-avx2 -mavx2 -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2016121620161026
514242refclang -O3 -fwrapv -mavx -maes -mpclmul -fomit-frame-pointer -Qunused-arguments2016121620161026
514718refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016121620161026
517198refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2016121620161026
517956refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2016121620161026
524634refgcc -m64 -march=nocona -O -fomit-frame-pointer2016121620161026
526954refgcc -march=nocona -O -fomit-frame-pointer2016121620161026
529812refgcc -march=barcelona -O -fomit-frame-pointer2016121620161026
531052refgcc -m64 -march=core-avx-i -O -fomit-frame-pointer2016121620161026
531310refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2016121620161026
531668refgcc -m64 -march=core2 -O -fomit-frame-pointer2016121620161026
532788refgcc -m64 -march=k8 -O -fomit-frame-pointer2016121620161026
533050refgcc -m64 -march=corei7 -O -fomit-frame-pointer2016121620161026
533084refgcc -march=k8 -O -fomit-frame-pointer2016121620161026
534306refgcc -O -fomit-frame-pointer2016121620161026
534712refgcc -m64 -O -fomit-frame-pointer2016121620161026
534760refgcc -m64 -march=barcelona -O -fomit-frame-pointer2016121620161026
535410refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2016121620161026
535694refgcc -m64 -march=corei7-avx -O -fomit-frame-pointer2016121620161026
536736refgcc -m64 -march=core-avx2 -O -fomit-frame-pointer2016121620161026
540844refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016121620161026
541922refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2016121620161026
542912refgcc -fno-schedule-insns -O -fomit-frame-pointer2016121620161026
547634refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016121620161026
549062refclang -O3 -fomit-frame-pointer -Qunused-arguments2016121620161026
550718refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016121620161026
553162refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016121620161026
768502refgcc -fno-schedule-insns -Os -fomit-frame-pointer2016121620161026
768846refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2016121620161026
769066refgcc -march=k8 -Os -fomit-frame-pointer2016121620161026
770082refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016121620161026
770148refgcc -m64 -Os -fomit-frame-pointer2016121620161026
770940refgcc -march=nocona -Os -fomit-frame-pointer2016121620161026
773672refgcc -Os -fomit-frame-pointer2016121620161026
774624refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2016121620161026
775192refgcc -m64 -march=core-avx-i -Os -fomit-frame-pointer2016121620161026
775306refgcc -m64 -march=nocona -Os -fomit-frame-pointer2016121620161026
775854refgcc -m64 -march=k8 -Os -fomit-frame-pointer2016121620161026
776672refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2016121620161026
777188refgcc -march=barcelona -Os -fomit-frame-pointer2016121620161026
778222refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2016121620161026
778244refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2016121620161026
778536refgcc -m64 -march=core2 -Os -fomit-frame-pointer2016121620161026
779012refgcc -m64 -march=corei7-avx -Os -fomit-frame-pointer2016121620161026
779178refgcc -m64 -march=core-avx2 -Os -fomit-frame-pointer2016121620161026
918242refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2016121620161026
919802refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2016121620161026
920386refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2016121620161026
920414refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2016121620161026
920776refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2016121620161026
921326refgcc -funroll-loops -Os -fomit-frame-pointer2016121620161026
921818refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2016121620161026
923644refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2016121620161026
925760refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2016121620161026
1792392refgcc -funroll-loops2016121620161026
1795194refgcc2016121620161026
1805870refcc2016121620161026

Compiler output

Implementation: crypto_aead/scream12v1/sse
Compiler: cc
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:185:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^~~~~~~~~~~~~~~~~~~~~~~~
scream.c: scream.c:185:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^~
scream.c: scream.c:199:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^~
scream.c: scream.c:203:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^~
scream.c: ...

Number of similar (compiler,implementation) pairs: 71, namely:
CompilerImplementations
cc sse
gcc sse
gcc -O2 -fomit-frame-pointer sse
gcc -O3 -fomit-frame-pointer sse
gcc -O -fomit-frame-pointer sse
gcc -Os -fomit-frame-pointer sse
gcc -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops sse
gcc -funroll-loops -O2 -fomit-frame-pointer sse
gcc -funroll-loops -O3 -fomit-frame-pointer sse
gcc -funroll-loops -O -fomit-frame-pointer sse
gcc -funroll-loops -Os -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer sse
gcc -m64 -O2 -fomit-frame-pointer sse
gcc -m64 -O3 -fomit-frame-pointer sse
gcc -m64 -O -fomit-frame-pointer sse
gcc -m64 -Os -fomit-frame-pointer sse
gcc -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -march=barcelona -O2 -fomit-frame-pointer sse
gcc -march=barcelona -O3 -fomit-frame-pointer sse
gcc -march=barcelona -O -fomit-frame-pointer sse
gcc -march=barcelona -Os -fomit-frame-pointer sse
gcc -march=k8 -O2 -fomit-frame-pointer sse
gcc -march=k8 -O3 -fomit-frame-pointer sse
gcc -march=k8 -O -fomit-frame-pointer sse
gcc -march=k8 -Os -fomit-frame-pointer sse
gcc -march=nocona -O2 -fomit-frame-pointer sse
gcc -march=nocona -O3 -fomit-frame-pointer sse
gcc -march=nocona -O -fomit-frame-pointer sse
gcc -march=nocona -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream12v1/sse
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
scream.c: scream.c:185:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:199:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:203:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:204:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:211:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments sse
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse

Compiler output

Implementation: crypto_aead/scream12v1/sse
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:185:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^~~~~~~~~~~~~~~~~~~~~~~~
scream.c: scream.c:185:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^~
scream.c: scream.c:199:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^~
scream.c: scream.c:203:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^~
scream.c: ...
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:185:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^~~~~~~~~~~~~~~~~~~~~~~~
scream.c: scream.c:185:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^~
scream.c: scream.c:199:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^~
scream.c: scream.c:203:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^~
scream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -m64 -march=barcelona -Os -fomit-frame-pointer sse