Implementation notes: amd64, hydra1, crypto_aead/scream12v3

Computer: hydra1
Architecture: amd64
CPU ID: AuthenticAMD-00100fa0-178bfbff
SUPERCOP version: 20161220
Operation: crypto_aead
Primitive: scream12v3
TimeImplementationCompilerBenchmark dateSUPERCOP version
507941refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2016102320161009
508007refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2016102320161009
508548refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2016102320161009
508742refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2016102320161009
510213refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2016102320161009
510219refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2016102320161009
510326refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2016102320161009
510447refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2016102320161009
512586refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2016102320161009
512949refgcc -funroll-loops -O3 -fomit-frame-pointer2016102320161009
514189refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2016102320161009
516192refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2016102320161009
516192refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2016102320161009
516384refgcc -funroll-loops -O2 -fomit-frame-pointer2016102320161009
526032refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016102320161009
543376refgcc -m64 -O3 -fomit-frame-pointer2016102320161009
543466refgcc -O3 -fomit-frame-pointer2016102320161009
544126refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2016102320161009
546089refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2016102320161009
546101refgcc -march=k8 -O3 -fomit-frame-pointer2016102320161009
553743refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2016102320161009
554287refgcc -march=barcelona -O3 -fomit-frame-pointer2016102320161009
554554refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2016102320161009
561893refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2016102320161009
561999refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2016102320161009
562086refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2016102320161009
562333refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2016102320161009
564120refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2016102320161009
564285refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2016102320161009
578837refgcc -march=nocona -O3 -fomit-frame-pointer2016102320161009
580456refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2016102320161009
608206refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2016102320161009
608245refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2016102320161009
608540refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2016102320161009
608640refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2016102320161009
608742refgcc -funroll-loops -m64 -O -fomit-frame-pointer2016102320161009
608787refgcc -funroll-loops -O -fomit-frame-pointer2016102320161009
609092refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2016102320161009
804528refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
804592refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
804999refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2016102320161009
805269refclang -O3 -fomit-frame-pointer -Qunused-arguments2016102320161009
813513refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
815074refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
1058222refgcc -march=nocona -O2 -fomit-frame-pointer2016102320161009
1058971refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2016102320161009
1059464refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2016102320161009
1064301refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2016102320161009
1064586refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2016102320161009
1064669refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2016102320161009
1072286refgcc -m64 -march=core2 -O -fomit-frame-pointer2016102320161009
1072461refgcc -m64 -march=corei7 -O -fomit-frame-pointer2016102320161009
1072552refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2016102320161009
1073239refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2016102320161009
1073603refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2016102320161009
1075197refgcc -m64 -O2 -fomit-frame-pointer2016102320161009
1075270refgcc -O2 -fomit-frame-pointer2016102320161009
1075450refgcc -march=k8 -O2 -fomit-frame-pointer2016102320161009
1080794refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2016102320161009
1084354refgcc -O -fomit-frame-pointer2016102320161009
1084557refgcc -m64 -O -fomit-frame-pointer2016102320161009
1084597refgcc -fno-schedule-insns -O -fomit-frame-pointer2016102320161009
1088764refgcc -march=k8 -O -fomit-frame-pointer2016102320161009
1089031refgcc -m64 -march=k8 -O -fomit-frame-pointer2016102320161009
1090974refgcc -march=barcelona -O2 -fomit-frame-pointer2016102320161009
1091022refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2016102320161009
1093326refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2016102320161009
1094317refgcc -m64 -march=barcelona -O -fomit-frame-pointer2016102320161009
1094518refgcc -march=barcelona -O -fomit-frame-pointer2016102320161009
1094591refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016102320161009
1099139refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016102320161009
1099776refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2016102320161009
1116968refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2016102320161009
1117162refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2016102320161009
1127176refgcc -march=nocona -Os -fomit-frame-pointer2016102320161009
1127968refgcc -m64 -march=nocona -O -fomit-frame-pointer2016102320161009
1128069refgcc -march=nocona -O -fomit-frame-pointer2016102320161009
1128530refgcc -m64 -march=nocona -Os -fomit-frame-pointer2016102320161009
1143477refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016102320161009
1145386refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2016102320161009
1145400refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2016102320161009
1146618refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2016102320161009
1147098refgcc -funroll-loops -Os -fomit-frame-pointer2016102320161009
1151419refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2016102320161009
1168861refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2016102320161009
1168957refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2016102320161009
1173053refgcc -m64 -march=k8 -Os -fomit-frame-pointer2016102320161009
1174872refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2016102320161009
1174888refgcc -march=barcelona -Os -fomit-frame-pointer2016102320161009
1175381refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2016102320161009
1175403refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2016102320161009
1175886refgcc -m64 -march=core2 -Os -fomit-frame-pointer2016102320161009
1175920refgcc -Os -fomit-frame-pointer2016102320161009
1176240refgcc -fno-schedule-insns -Os -fomit-frame-pointer2016102320161009
1176272refgcc -m64 -Os -fomit-frame-pointer2016102320161009
1177109refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2016102320161009
1177208refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2016102320161009
1177544refgcc -march=k8 -Os -fomit-frame-pointer2016102320161009
5057624refgcc2016102320161009
5057979refgcc -funroll-loops2016102320161009
5074944refcc2016102320161009

Test failure

Implementation: crypto_aead/scream12v3/sse
Compiler: gcc -m64 -march=core2 -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 14, namely:
CompilerImplementations
gcc -m64 -march=core2 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -O3 -fomit-frame-pointer ref sse
gcc -m64 -march=core2 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -Os -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer sse
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer sse
gcc -m64 -march=corei7 -O -fomit-frame-pointer sse
gcc -m64 -march=corei7 -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream12v3/sse
Compiler: cc
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:202:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:202:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...

Number of similar (compiler,implementation) pairs: 79, namely:
CompilerImplementations
cc sse
gcc sse
gcc -O2 -fomit-frame-pointer sse
gcc -O3 -fomit-frame-pointer sse
gcc -O -fomit-frame-pointer sse
gcc -Os -fomit-frame-pointer sse
gcc -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops sse
gcc -funroll-loops -O2 -fomit-frame-pointer sse
gcc -funroll-loops -O3 -fomit-frame-pointer sse
gcc -funroll-loops -O -fomit-frame-pointer sse
gcc -funroll-loops -Os -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer sse
gcc -m64 -O2 -fomit-frame-pointer sse
gcc -m64 -O3 -fomit-frame-pointer sse
gcc -m64 -O -fomit-frame-pointer sse
gcc -m64 -Os -fomit-frame-pointer sse
gcc -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer sse
gcc -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -march=barcelona -O2 -fomit-frame-pointer sse
gcc -march=barcelona -O3 -fomit-frame-pointer sse
gcc -march=barcelona -O -fomit-frame-pointer sse
gcc -march=barcelona -Os -fomit-frame-pointer sse
gcc -march=k8 -O2 -fomit-frame-pointer sse
gcc -march=k8 -O3 -fomit-frame-pointer sse
gcc -march=k8 -O -fomit-frame-pointer sse
gcc -march=k8 -Os -fomit-frame-pointer sse
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv sse
gcc -march=nocona -O2 -fomit-frame-pointer sse
gcc -march=nocona -O3 -fomit-frame-pointer sse
gcc -march=nocona -O -fomit-frame-pointer sse
gcc -march=nocona -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream12v3/sse
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
scream.c: scream.c:202:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:221:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:228:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: ...

Number of similar (compiler,implementation) pairs: 6, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments sse
clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments sse
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse

Compiler output

Implementation: crypto_aead/scream12v3/sse
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:202:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:202:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:202:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:202:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -m64 -march=barcelona -Os -fomit-frame-pointer sse