Implementation notes: amd64, hydra1, crypto_aead/scream12v2

Computer: hydra1
Architecture: amd64
CPU ID: AuthenticAMD-00100fa0-178bfbff
SUPERCOP version: 20161220
Operation: crypto_aead
Primitive: scream12v2
TimeImplementationCompilerBenchmark dateSUPERCOP version
477554refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2016102320161009
477896refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2016102320161009
478230refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2016102320161009
478662refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2016102320161009
503624refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2016102320161009
503640refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2016102320161009
504069refgcc -funroll-loops -O2 -fomit-frame-pointer2016102320161009
506221refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016102320161009
506933refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2016102320161009
507051refgcc -march=k8 -O3 -fomit-frame-pointer2016102320161009
507330refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2016102320161009
508050refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2016102320161009
508326refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2016102320161009
508344refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2016102320161009
508748refgcc -march=barcelona -O3 -fomit-frame-pointer2016102320161009
509382refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2016102320161009
509810refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2016102320161009
512203refgcc -O3 -fomit-frame-pointer2016102320161009
513293refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2016102320161009
513901refgcc -m64 -O3 -fomit-frame-pointer2016102320161009
518338refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2016102320161009
518437refgcc -funroll-loops -O3 -fomit-frame-pointer2016102320161009
518472refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2016102320161009
524344refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2016102320161009
524671refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2016102320161009
531557refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2016102320161009
531600refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2016102320161009
535303refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2016102320161009
535442refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2016102320161009
538203refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2016102320161009
538632refgcc -march=nocona -O3 -fomit-frame-pointer2016102320161009
539544refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2016102320161009
539617refgcc -funroll-loops -m64 -O -fomit-frame-pointer2016102320161009
539975refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2016102320161009
540082refgcc -funroll-loops -O -fomit-frame-pointer2016102320161009
540107refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2016102320161009
540210refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2016102320161009
540382refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2016102320161009
849933refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
850267refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
852070refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
852291refclang -O3 -fomit-frame-pointer -Qunused-arguments2016102320161009
875802refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2016102320161009
879741refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
1131459refgcc -march=nocona -O2 -fomit-frame-pointer2016102320161009
1131544refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2016102320161009
1145403refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2016102320161009
1147179refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2016102320161009
1147320refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2016102320161009
1147391refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2016102320161009
1152408refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2016102320161009
1153467refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2016102320161009
1155573refgcc -march=nocona -O -fomit-frame-pointer2016102320161009
1156413refgcc -m64 -march=nocona -O -fomit-frame-pointer2016102320161009
1158799refgcc -march=nocona -Os -fomit-frame-pointer2016102320161009
1161250refgcc -march=k8 -O -fomit-frame-pointer2016102320161009
1161259refgcc -m64 -march=k8 -O -fomit-frame-pointer2016102320161009
1164968refgcc -march=k8 -O2 -fomit-frame-pointer2016102320161009
1166261refgcc -m64 -O2 -fomit-frame-pointer2016102320161009
1166408refgcc -O2 -fomit-frame-pointer2016102320161009
1166927refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2016102320161009
1167024refgcc -m64 -march=nocona -Os -fomit-frame-pointer2016102320161009
1169763refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2016102320161009
1172090refgcc -march=barcelona -O -fomit-frame-pointer2016102320161009
1174034refgcc -m64 -march=corei7 -O -fomit-frame-pointer2016102320161009
1175064refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2016102320161009
1176797refgcc -m64 -O -fomit-frame-pointer2016102320161009
1176859refgcc -O -fomit-frame-pointer2016102320161009
1176939refgcc -fno-schedule-insns -O -fomit-frame-pointer2016102320161009
1178954refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016102320161009
1180000refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2016102320161009
1180000refgcc -march=barcelona -O2 -fomit-frame-pointer2016102320161009
1180099refgcc -m64 -march=core2 -O -fomit-frame-pointer2016102320161009
1180250refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2016102320161009
1181080refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2016102320161009
1181627refgcc -m64 -march=barcelona -O -fomit-frame-pointer2016102320161009
1186850refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016102320161009
1187314refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2016102320161009
1189032refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2016102320161009
1189077refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2016102320161009
1189122refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2016102320161009
1189221refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2016102320161009
1189280refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2016102320161009
1189302refgcc -funroll-loops -Os -fomit-frame-pointer2016102320161009
1189365refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2016102320161009
1215685refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2016102320161009
1215786refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2016102320161009
1215896refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016102320161009
1216000refgcc -m64 -march=core2 -Os -fomit-frame-pointer2016102320161009
1216453refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2016102320161009
1229309refgcc -march=k8 -Os -fomit-frame-pointer2016102320161009
1231072refgcc -m64 -Os -fomit-frame-pointer2016102320161009
1231429refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2016102320161009
1233061refgcc -m64 -march=k8 -Os -fomit-frame-pointer2016102320161009
1233248refgcc -march=barcelona -Os -fomit-frame-pointer2016102320161009
1233286refgcc -Os -fomit-frame-pointer2016102320161009
1233386refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2016102320161009
1238435refgcc -fno-schedule-insns -Os -fomit-frame-pointer2016102320161009
5540013refcc2016102320161009
5551725refgcc -funroll-loops2016102320161009
5552026refgcc2016102320161009

Test failure

Implementation: crypto_aead/scream12v2/sse
Compiler: gcc -m64 -march=core2 -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 14, namely:
CompilerImplementations
gcc -m64 -march=core2 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -O3 -fomit-frame-pointer ref sse
gcc -m64 -march=core2 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -Os -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer sse
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer sse
gcc -m64 -march=corei7 -O -fomit-frame-pointer sse
gcc -m64 -march=corei7 -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream12v2/sse
Compiler: cc
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:185:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:185:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:199:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:203:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...

Number of similar (compiler,implementation) pairs: 79, namely:
CompilerImplementations
cc sse
gcc sse
gcc -O2 -fomit-frame-pointer sse
gcc -O3 -fomit-frame-pointer sse
gcc -O -fomit-frame-pointer sse
gcc -Os -fomit-frame-pointer sse
gcc -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops sse
gcc -funroll-loops -O2 -fomit-frame-pointer sse
gcc -funroll-loops -O3 -fomit-frame-pointer sse
gcc -funroll-loops -O -fomit-frame-pointer sse
gcc -funroll-loops -Os -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer sse
gcc -m64 -O2 -fomit-frame-pointer sse
gcc -m64 -O3 -fomit-frame-pointer sse
gcc -m64 -O -fomit-frame-pointer sse
gcc -m64 -Os -fomit-frame-pointer sse
gcc -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer sse
gcc -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -march=barcelona -O2 -fomit-frame-pointer sse
gcc -march=barcelona -O3 -fomit-frame-pointer sse
gcc -march=barcelona -O -fomit-frame-pointer sse
gcc -march=barcelona -Os -fomit-frame-pointer sse
gcc -march=k8 -O2 -fomit-frame-pointer sse
gcc -march=k8 -O3 -fomit-frame-pointer sse
gcc -march=k8 -O -fomit-frame-pointer sse
gcc -march=k8 -Os -fomit-frame-pointer sse
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv sse
gcc -march=nocona -O2 -fomit-frame-pointer sse
gcc -march=nocona -O3 -fomit-frame-pointer sse
gcc -march=nocona -O -fomit-frame-pointer sse
gcc -march=nocona -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream12v2/sse
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
scream.c: scream.c:185:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:199:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:203:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:204:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:211:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: ...

Number of similar (compiler,implementation) pairs: 6, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments sse
clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments sse
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse

Compiler output

Implementation: crypto_aead/scream12v2/sse
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:185:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:185:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:199:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:203:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:185:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:185:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:186:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:190:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:191:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:198:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:199:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:203:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -m64 -march=barcelona -Os -fomit-frame-pointer sse