Implementation notes: amd64, hydra1, crypto_aead/scream10v3

Computer: hydra1
Architecture: amd64
CPU ID: AuthenticAMD-00100fa0-178bfbff
SUPERCOP version: 20161220
Operation: crypto_aead
Primitive: scream10v3
TimeImplementationCompilerBenchmark dateSUPERCOP version
436098refgcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer2016102320161009
436314refgcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer2016102320161009
436333refgcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer2016102320161009
436705refgcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer2016102320161009
437109refgcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer2016102320161009
437318refgcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer2016102320161009
437357refgcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer2016102320161009
438193refgcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer2016102320161009
439909refgcc -funroll-loops -m64 -O3 -fomit-frame-pointer2016102320161009
440078refgcc -funroll-loops -O3 -fomit-frame-pointer2016102320161009
440198refgcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer2016102320161009
440272refgcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer2016102320161009
440584refgcc -funroll-loops -O2 -fomit-frame-pointer2016102320161009
440888refgcc -funroll-loops -m64 -O2 -fomit-frame-pointer2016102320161009
452768refgcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv2016102320161009
465533refgcc -O3 -fomit-frame-pointer2016102320161009
465792refgcc -m64 -O3 -fomit-frame-pointer2016102320161009
466518refgcc -march=k8 -O3 -fomit-frame-pointer2016102320161009
466970refgcc -fno-schedule-insns -O3 -fomit-frame-pointer2016102320161009
467427refgcc -m64 -march=k8 -O3 -fomit-frame-pointer2016102320161009
474008refgcc -m64 -march=barcelona -O3 -fomit-frame-pointer2016102320161009
474238refgcc -march=barcelona -O3 -fomit-frame-pointer2016102320161009
474625refgcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer2016102320161009
479529refgcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer2016102320161009
479799refgcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer2016102320161009
480967refgcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer2016102320161009
480989refgcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer2016102320161009
482238refgcc -funroll-loops -march=nocona -O -fomit-frame-pointer2016102320161009
483362refgcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer2016102320161009
499475refgcc -m64 -march=nocona -O3 -fomit-frame-pointer2016102320161009
499990refgcc -march=nocona -O3 -fomit-frame-pointer2016102320161009
516613refgcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer2016102320161009
517835refgcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer2016102320161009
517951refgcc -funroll-loops -march=k8 -O -fomit-frame-pointer2016102320161009
518155refgcc -funroll-loops -O -fomit-frame-pointer2016102320161009
518162refgcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer2016102320161009
518736refgcc -funroll-loops -march=barcelona -O -fomit-frame-pointer2016102320161009
520253refgcc -funroll-loops -m64 -O -fomit-frame-pointer2016102320161009
681544refclang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
682557refclang -O3 -fomit-frame-pointer -Qunused-arguments2016102320161009
684299refclang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
685970refclang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
688371refclang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments2016102320161009
688901refclang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments2016102320161009
887245refgcc -march=nocona -O2 -fomit-frame-pointer2016102320161009
887776refgcc -m64 -march=nocona -O2 -fomit-frame-pointer2016102320161009
903264refgcc -m64 -O2 -fomit-frame-pointer2016102320161009
905453refgcc -fno-schedule-insns -O2 -fomit-frame-pointer2016102320161009
906747refgcc -march=k8 -O2 -fomit-frame-pointer2016102320161009
908815refgcc -m64 -march=corei7 -O2 -fomit-frame-pointer2016102320161009
908958refgcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer2016102320161009
909216refgcc -m64 -march=k8 -O2 -fomit-frame-pointer2016102320161009
911667refgcc -O2 -fomit-frame-pointer2016102320161009
914944refgcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer2016102320161009
915424refgcc -m64 -march=core2 -O -fomit-frame-pointer2016102320161009
915429refgcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer2016102320161009
915436refgcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer2016102320161009
915616refgcc -m64 -march=corei7 -O -fomit-frame-pointer2016102320161009
915976refgcc -m64 -march=core2 -O2 -fomit-frame-pointer2016102320161009
918045refgcc -fno-schedule-insns -O -fomit-frame-pointer2016102320161009
918100refgcc -O -fomit-frame-pointer2016102320161009
922080refgcc -m64 -march=barcelona -O2 -fomit-frame-pointer2016102320161009
922081refgcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv2016102320161009
924544refgcc -march=barcelona -O2 -fomit-frame-pointer2016102320161009
924696refgcc -m64 -O -fomit-frame-pointer2016102320161009
926090refgcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer2016102320161009
928310refgcc -m64 -march=barcelona -O -fomit-frame-pointer2016102320161009
928416refgcc -march=barcelona -O -fomit-frame-pointer2016102320161009
928469refgcc -m64 -march=native -mtune=native -O -fomit-frame-pointer2016102320161009
928754refgcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv2016102320161009
929056refgcc -march=k8 -O -fomit-frame-pointer2016102320161009
929781refgcc -m64 -march=k8 -O -fomit-frame-pointer2016102320161009
944910refgcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer2016102320161009
950892refgcc -funroll-loops -march=nocona -Os -fomit-frame-pointer2016102320161009
953380refgcc -m64 -march=nocona -Os -fomit-frame-pointer2016102320161009
954640refgcc -march=nocona -Os -fomit-frame-pointer2016102320161009
956154refgcc -m64 -march=nocona -O -fomit-frame-pointer2016102320161009
961683refgcc -march=nocona -O -fomit-frame-pointer2016102320161009
965678refgcc -funroll-loops -march=k8 -Os -fomit-frame-pointer2016102320161009
965894refgcc -funroll-loops -Os -fomit-frame-pointer2016102320161009
965960refgcc -funroll-loops -m64 -Os -fomit-frame-pointer2016102320161009
969675refgcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer2016102320161009
971373refgcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv2016102320161009
972666refgcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer2016102320161009
988784refgcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer2016102320161009
989795refgcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer2016102320161009
989866refgcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer2016102320161009
991184refgcc -m64 -Os -fomit-frame-pointer2016102320161009
993371refgcc -march=barcelona -Os -fomit-frame-pointer2016102320161009
994157refgcc -m64 -march=core2 -Os -fomit-frame-pointer2016102320161009
994720refgcc -m64 -march=corei7 -Os -fomit-frame-pointer2016102320161009
994813refgcc -m64 -march=k8 -Os -fomit-frame-pointer2016102320161009
994952refgcc -Os -fomit-frame-pointer2016102320161009
995021refgcc -m64 -march=barcelona -Os -fomit-frame-pointer2016102320161009
995776refgcc -fno-schedule-insns -Os -fomit-frame-pointer2016102320161009
997968refgcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer2016102320161009
998408refgcc -march=k8 -Os -fomit-frame-pointer2016102320161009
999131refgcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer2016102320161009
4276082refgcc2016102320161009
4278387refcc2016102320161009
4282587refgcc -funroll-loops2016102320161009

Test failure

Implementation: crypto_aead/scream10v3/sse
Compiler: gcc -m64 -march=core2 -O2 -fomit-frame-pointer
error 111

Number of similar (compiler,implementation) pairs: 14, namely:
CompilerImplementations
gcc -m64 -march=core2 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -O3 -fomit-frame-pointer ref sse
gcc -m64 -march=core2 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -Os -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4.1 -Os -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -O2 -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -O -fomit-frame-pointer sse
gcc -m64 -march=core2 -msse4 -Os -fomit-frame-pointer sse
gcc -m64 -march=corei7 -O2 -fomit-frame-pointer sse
gcc -m64 -march=corei7 -O -fomit-frame-pointer sse
gcc -m64 -march=corei7 -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream10v3/sse
Compiler: cc
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:202:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:202:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...

Number of similar (compiler,implementation) pairs: 79, namely:
CompilerImplementations
cc sse
gcc sse
gcc -O2 -fomit-frame-pointer sse
gcc -O3 -fomit-frame-pointer sse
gcc -O -fomit-frame-pointer sse
gcc -Os -fomit-frame-pointer sse
gcc -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops sse
gcc -funroll-loops -O2 -fomit-frame-pointer sse
gcc -funroll-loops -O3 -fomit-frame-pointer sse
gcc -funroll-loops -O -fomit-frame-pointer sse
gcc -funroll-loops -Os -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O2 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O3 -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -O -fomit-frame-pointer sse
gcc -funroll-loops -fno-schedule-insns -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=barcelona -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -O -fomit-frame-pointer sse
gcc -funroll-loops -march=k8 -Os -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O2 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O3 -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -O -fomit-frame-pointer sse
gcc -funroll-loops -march=nocona -Os -fomit-frame-pointer sse
gcc -m64 -O2 -fomit-frame-pointer sse
gcc -m64 -O3 -fomit-frame-pointer sse
gcc -m64 -O -fomit-frame-pointer sse
gcc -m64 -Os -fomit-frame-pointer sse
gcc -m64 -march=k8 -O2 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O3 -fomit-frame-pointer sse
gcc -m64 -march=k8 -O -fomit-frame-pointer sse
gcc -m64 -march=k8 -Os -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O2 -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O3 -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -O -fomit-frame-pointer sse
gcc -m64 -march=native -mtune=native -Os -fomit-frame-pointer sse
gcc -m64 -march=nocona -O2 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O3 -fomit-frame-pointer sse
gcc -m64 -march=nocona -O -fomit-frame-pointer sse
gcc -m64 -march=nocona -Os -fomit-frame-pointer sse
gcc -march=barcelona -O2 -fomit-frame-pointer sse
gcc -march=barcelona -O3 -fomit-frame-pointer sse
gcc -march=barcelona -O -fomit-frame-pointer sse
gcc -march=barcelona -Os -fomit-frame-pointer sse
gcc -march=k8 -O2 -fomit-frame-pointer sse
gcc -march=k8 -O3 -fomit-frame-pointer sse
gcc -march=k8 -O -fomit-frame-pointer sse
gcc -march=k8 -Os -fomit-frame-pointer sse
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv sse
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv sse
gcc -march=nocona -O2 -fomit-frame-pointer sse
gcc -march=nocona -O3 -fomit-frame-pointer sse
gcc -march=nocona -O -fomit-frame-pointer sse
gcc -march=nocona -Os -fomit-frame-pointer sse

Compiler output

Implementation: crypto_aead/scream10v3/sse
Compiler: clang -O3 -fomit-frame-pointer -Qunused-arguments
scream.c: scream.c:202:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:221:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: D ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:228:10: error: '__builtin_ia32_pshufb128' needs target feature ssse3
scream.c: ...

Number of similar (compiler,implementation) pairs: 6, namely:
CompilerImplementations
clang -O3 -fomit-frame-pointer -Qunused-arguments sse
clang -O3 -fwrapv -march=native -fomit-frame-pointer -Qunused-arguments sse
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a8 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=cortex-a9 -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse
clang -mcpu=native -mfpu=neon -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments sse

Compiler output

Implementation: crypto_aead/scream10v3/sse
Compiler: gcc -m64 -march=barcelona -O2 -fomit-frame-pointer
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:202:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:202:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...
scream.c: scream.c: In function 'LBox16P':
scream.c: scream.c:202:10: warning: implicit declaration of function '__builtin_ia32_pshufb128' [-Wimplicit-function-declaration]
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:202:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: A = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:203:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: C = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:207:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: B = __builtin_ia32_pshufb128(table, t0);
scream.c: ^
scream.c: scream.c:208:8: error: incompatible types when assigning to type 'v16qi {aka __vector(16) char}' from type 'int'
scream.c: D = __builtin_ia32_pshufb128(table, t1);
scream.c: ^
scream.c: scream.c:215:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: A ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: scream.c:216:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: C ^= __builtin_ia32_pshufb128(table, in[2]);
scream.c: ^
scream.c: scream.c:220:7: error: conversion of scalar 'int' to vector 'v16qi {aka __vector(16) char}' involves truncation
scream.c: B ^= __builtin_ia32_pshufb128(table, in[0]);
scream.c: ^
scream.c: ...

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -m64 -march=barcelona -O2 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O3 -fomit-frame-pointer sse
gcc -m64 -march=barcelona -O -fomit-frame-pointer sse
gcc -m64 -march=barcelona -Os -fomit-frame-pointer sse