Implementation notes: amd64, cel02, crypto_sign/rainbow1acyclicc363232

Computer: cel02
Architecture: amd64
CPU ID: GenuineIntel-00050657-bfebfbff
SUPERCOP version: 20201130
Operation: crypto_sign
Primitive: rainbow1acyclicc363232
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
497652295315 0 0164864 896 1656T:refclang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020121120201130
562836826925 0 096411 912 1656T:refgcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
570762287050 0 0157531 920 1688T:amd64gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
747241892491 8 0151075 928 1688T:ssse3gcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
770656251810 8 0116875 920 1656T:ssse3gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
875641054950 0 0125467 920 1688T:refgcc_-march=native_-mtune=native_-O2_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
888633260655 0 0129714 888 1656T:amd64clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020121120201130
8929088143689 8 0182419 928 1720T:ssse3gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
912326689006 0 0159131 920 1688T:amd64gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
9150102109462 8 0167026 896 1656T:avx2clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020121120201130
923133291353 8 0149147 928 1688T:ssse3gcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
9478468138183 0 0211059 920 1720T:amd64gcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
959708292456 8 0150746 896 1656T:ssse3clang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020121120201130
972268455237 0 0125371 920 1688T:refgcc_-march=native_-mtune=native_-O_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
9907914103378 0 0173912 896 1656T:amd64clang_-mcpu=native_-O3_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020121120201130
10216026106856 0 0180051 920 1720T:refgcc_-march=native_-mtune=native_-O3_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130
1024909050978 0 0120018 888 1656T:refclang_-march=native_-Os_-fomit-frame-pointer_-fwrapv_-Qunused-arguments_-fPIC_-fPIE2020121120201130
1086858229916 0 099571 912 1656T:amd64gcc_-march=native_-mtune=native_-Os_-fomit-frame-pointer_-fwrapv_-fPIC_-fPIE2020121120201130

Compiler output

Implementation: T:amd64
Security model: timingleaks
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x564854c46a20: v4i64 = X86ISD::VTRUNC 0x564854c468f0
try.c: 0x564854c468f0: v16i32 = vselect 0x564854c413f0, 0x564854be43d0, 0x564854c467c0
try.c: 0x564854c413f0: v4i1 = X86ISD::PCMPGTM 0x564854c3c7c0, 0x564854c38350
try.c: 0x564854c3c7c0: v4i64 = X86ISD::VBROADCAST 0x564854bf4f40
try.c: 0x564854bf4f40: i64,ch = load<LD8[%lsr.iv6971]> 0x564854b4d960, 0x564854c27cb0, undef:i64
try.c: 0x564854c27cb0: i64,ch = CopyFromReg 0x564854b4d960, Register:i64 %vreg50
try.c: 0x564854c385b0: i64 = Register %vreg50
try.c: 0x564854bf6410: i64 = undef
try.c: 0x564854c38350: v4i64,ch = CopyFromReg 0x564854b4d960, Register:v4i64 %vreg13
try.c: 0x564854c3d010: v4i64 = Register %vreg13
try.c: 0x564854be43d0: v16i32 = X86ISD::VBROADCAST 0x564854c3ca20
try.c: 0x564854c3ca20: i32,ch = load<LD4[ConstantPool]> 0x564854b4d960, 0x564854be7160, undef:i64
try.c: 0x564854be7160: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x564854b9e1c0: i64 = TargetConstantPool<i32 1> 0
try.c: 0x564854bf6410: i64 = undef
try.c: 0x564854c467c0: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: 0x564854c46690: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:amd64

Compiler output

Implementation: T:amd64
Security model: timingleaks
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x5637803d0690: v4i64 = X86ISD::VTRUNC 0x5637803d0560
try.c: 0x5637803d0560: v16i32 = vselect 0x5637803d4ea0, 0x5637803724d0, 0x5637803d0430
try.c: 0x5637803d4ea0: v4i1 = X86ISD::PCMPGTM 0x5637803c9050, 0x5637803c4be0
try.c: 0x5637803c9050: v4i64 = X86ISD::VBROADCAST 0x563780372990
try.c: 0x563780372990: i64,ch = load<LD8[%lsr.iv6971]> 0x5637802c2a40, 0x563780363a70, undef:i64
try.c: 0x563780363a70: i64,ch = CopyFromReg 0x5637802c2a40, Register:i64 %vreg50
try.c: 0x5637803c4e40: i64 = Register %vreg50
try.c: 0x563780360800: i64 = undef
try.c: 0x5637803c4be0: v4i64,ch = CopyFromReg 0x5637802c2a40, Register:v4i64 %vreg13
try.c: 0x5637803c98a0: v4i64 = Register %vreg13
try.c: 0x5637803724d0: v16i32 = X86ISD::VBROADCAST 0x5637803c92b0
try.c: 0x5637803c92b0: i32,ch = load<LD4[ConstantPool]> 0x5637802c2a40, 0x563780374e70, undef:i64
try.c: 0x563780374e70: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x563780361180: i64 = TargetConstantPool<i32 1> 0
try.c: 0x563780360800: i64 = undef
try.c: 0x5637803d0430: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: 0x5637803d0300: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:amd64

Compiler output

Implementation: T:amd64
Security model: timingleaks
Compiler: clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x55e7daf0bd60: v4i64 = X86ISD::VTRUNC 0x55e7daf0bc30
try.c: 0x55e7daf0bc30: v16i32 = vselect 0x55e7daf06730, 0x55e7daebfb60, 0x55e7daf0bb00
try.c: 0x55e7daf06730: v4i1 = X86ISD::PCMPGTM 0x55e7daf05720, 0x55e7daf012b0
try.c: 0x55e7daf05720: v4i64 = X86ISD::VBROADCAST 0x55e7daeca9f0
try.c: 0x55e7daeca9f0: i64,ch = load<LD8[%lsr.iv6971]> 0x55e7dae16950, 0x55e7daeeaea0, undef:i64
try.c: 0x55e7daeeaea0: i64,ch = CopyFromReg 0x55e7dae16950, Register:i64 %vreg50
try.c: 0x55e7daf01510: i64 = Register %vreg50
try.c: 0x55e7daecbec0: i64 = undef
try.c: 0x55e7daf012b0: v4i64,ch = CopyFromReg 0x55e7dae16950, Register:v4i64 %vreg13
try.c: 0x55e7daf05f70: v4i64 = Register %vreg13
try.c: 0x55e7daebfb60: v16i32 = X86ISD::VBROADCAST 0x55e7daf05980
try.c: 0x55e7daf05980: i32,ch = load<LD4[ConstantPool]> 0x55e7dae16950, 0x55e7daea59e0, undef:i64
try.c: 0x55e7daea59e0: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x55e7daef08b0: i64 = TargetConstantPool<i32 1> 0
try.c: 0x55e7daecbec0: i64 = undef
try.c: 0x55e7daf0bb00: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: 0x55e7daf0b9d0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:amd64

Compiler output

Implementation: T:avx2
Security model: timingleaks
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x55e1f72e3440: v4i64 = X86ISD::VTRUNC 0x55e1f72e3310
try.c: 0x55e1f72e3310: v16i32 = vselect 0x55e1f72d8f40, 0x55e1f7262ab0, 0x55e1f72e31e0
try.c: 0x55e1f72d8f40: v4i1 = X86ISD::PCMPGTM 0x55e1f72c0bc0, 0x55e1f72bc750
try.c: 0x55e1f72c0bc0: v4i64 = X86ISD::VBROADCAST 0x55e1f725fc50
try.c: 0x55e1f725fc50: i64,ch = load<LD8[%lsr.iv6971]> 0x55e1f71d19b0, 0x55e1f72b75b0, undef:i64
try.c: 0x55e1f72b75b0: i64,ch = CopyFromReg 0x55e1f71d19b0, Register:i64 %vreg50
try.c: 0x55e1f72bc9b0: i64 = Register %vreg50
try.c: 0x55e1f7261120: i64 = undef
try.c: 0x55e1f72bc750: v4i64,ch = CopyFromReg 0x55e1f71d19b0, Register:v4i64 %vreg13
try.c: 0x55e1f72c1410: v4i64 = Register %vreg13
try.c: 0x55e1f7262ab0: v16i32 = X86ISD::VBROADCAST 0x55e1f72c0e20
try.c: 0x55e1f72c0e20: i32,ch = load<LD4[ConstantPool]> 0x55e1f71d19b0, 0x55e1f7264ea0, undef:i64
try.c: 0x55e1f7264ea0: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x55e1f725c780: i64 = TargetConstantPool<i32 1> 0
try.c: 0x55e1f7261120: i64 = undef
try.c: 0x55e1f72e31e0: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: 0x55e1f72e30b0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx2

Compiler output

Implementation: T:avx2
Security model: timingleaks
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x55bb0f7cd5a0: v4i64 = X86ISD::VTRUNC 0x55bb0f7cd470
try.c: 0x55bb0f7cd470: v16i32 = vselect 0x55bb0f7bc620, 0x55bb0f748fa0, 0x55bb0f7cd340
try.c: 0x55bb0f7bc620: v4i1 = X86ISD::PCMPGTM 0x55bb0f7b5480, 0x55bb0f7b2fa0
try.c: 0x55bb0f7b5480: v4i64 = X86ISD::VBROADCAST 0x55bb0f749460
try.c: 0x55bb0f749460: i64,ch = load<LD8[%lsr.iv6971]> 0x55bb0f6afa30, 0x55bb0f751f80, undef:i64
try.c: 0x55bb0f751f80: i64,ch = CopyFromReg 0x55bb0f6afa30, Register:i64 %vreg50
try.c: 0x55bb0f7b3200: i64 = Register %vreg50
try.c: 0x55bb0f761b70: i64 = undef
try.c: 0x55bb0f7b2fa0: v4i64,ch = CopyFromReg 0x55bb0f6afa30, Register:v4i64 %vreg13
try.c: 0x55bb0f7b5cd0: v4i64 = Register %vreg13
try.c: 0x55bb0f748fa0: v16i32 = X86ISD::VBROADCAST 0x55bb0f7b56e0
try.c: 0x55bb0f7b56e0: i32,ch = load<LD4[ConstantPool]> 0x55bb0f6afa30, 0x55bb0f750550, undef:i64
try.c: 0x55bb0f750550: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x55bb0f7624f0: i64 = TargetConstantPool<i32 1> 0
try.c: 0x55bb0f761b70: i64 = undef
try.c: 0x55bb0f7cd340: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: 0x55bb0f7cd210: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx2

Compiler output

Implementation: T:avx2
Security model: timingleaks
Compiler: clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x558d87a9ab90: v4i64 = X86ISD::VTRUNC 0x558d87a9aa60
try.c: 0x558d87a9aa60: v16i32 = vselect 0x558d87a86610, 0x558d87a3af90, 0x558d87a9a930
try.c: 0x558d87a86610: v4i1 = X86ISD::PCMPGTM 0x558d87a7f200, 0x558d87a7ad90
try.c: 0x558d87a7f200: v4i64 = X86ISD::VBROADCAST 0x558d87a1fc40
try.c: 0x558d87a1fc40: i64,ch = load<LD8[%lsr.iv6971]> 0x558d8798f950, 0x558d87a72c20, undef:i64
try.c: 0x558d87a72c20: i64,ch = CopyFromReg 0x558d8798f950, Register:i64 %vreg50
try.c: 0x558d87a7aff0: i64 = Register %vreg50
try.c: 0x558d87a39600: i64 = undef
try.c: 0x558d87a7ad90: v4i64,ch = CopyFromReg 0x558d8798f950, Register:v4i64 %vreg13
try.c: 0x558d87a7fa50: v4i64 = Register %vreg13
try.c: 0x558d87a3af90: v16i32 = X86ISD::VBROADCAST 0x558d87a7f460
try.c: 0x558d87a7f460: i32,ch = load<LD4[ConstantPool]> 0x558d8798f950, 0x558d87a1f220, undef:i64
try.c: 0x558d87a1f220: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x558d87a63b90: i64 = TargetConstantPool<i32 1> 0
try.c: 0x558d87a39600: i64 = undef
try.c: 0x558d87a9a930: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: 0x558d87a9a800: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx2

Compiler output

Implementation: T:avx2
Security model: timingleaks
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
blas_comm.c: In file included from blas_comm.c:6:
blas_comm.c: In file included from ./blas.h:25:
blas_comm.c: ./blas_avx2.h:88:17: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'gf256v_add_avx2' that is compiled without support for 'sse4.2'
blas_comm.c: __m256i inp = _mm256_loadu_si256( (__m256i*) (a+i*32) );
blas_comm.c: ^
blas_comm.c: ./blas_avx2.h:89:17: error: always_inline function '_mm256_loadu_si256' requires target feature 'sse4.2', but would be inlined into function 'gf256v_add_avx2' that is compiled without support for 'sse4.2'
blas_comm.c: __m256i out = _mm256_loadu_si256( (__m256i*) (accu_b+i*32) );
blas_comm.c: ^
blas_comm.c: ./blas_avx2.h:91:3: error: always_inline function '_mm256_storeu_si256' requires target feature 'sse4.2', but would be inlined into function 'gf256v_add_avx2' that is compiled without support for 'sse4.2'
blas_comm.c: _mm256_storeu_si256( (__m256i*) (accu_b+i*32) , out );
blas_comm.c: ^
blas_comm.c: fatal error: error in backend: Do not know how to split this operator's operand!
blas_comm.c:
blas_comm.c: clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
blas_comm.c: clang version 3.8.1-24 (tags/RELEASE_381/final)
blas_comm.c: Target: x86_64-pc-linux-gnu
blas_comm.c: Thread model: posix
blas_comm.c: InstalledDir: /usr/bin
blas_comm.c: clang: note: diagnostic msg: PLEASE submit a bug report to http://llvm.org/bugs/ and include the crash backtrace, preprocessed source, and associated run script.
blas_comm.c: clang: note: diagnostic msg:
blas_comm.c: ********************
blas_comm.c:
blas_comm.c: PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:
blas_comm.c: Preprocessed source(s) and associated run script(s) are located at:
blas_comm.c: clang: note: diagnostic msg: /tmp/blas_comm-dcffb4.c
blas_comm.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:avx2

Compiler output

Implementation: T:avx2
Security model: timingleaks
Compiler: gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE
blas_matrix_avx2.c: blas_matrix_avx2.c: In function '_gf256mat_gauss_elim_avx2_32x':
blas_matrix_avx2.c: blas_matrix_avx2.c:479:19: warning: implicit declaration of function '_mm256_setr_m128i' [-Wimplicit-function-declaration]
blas_matrix_avx2.c: __m256i mask = _mm256_setr_m128i( add_mask , add_mask );
blas_matrix_avx2.c: ^~~~~~~~~~~~~~~~~
blas_matrix_avx2.c: blas_matrix_avx2.c:479:19: error: incompatible types when initializing type '__m256i {aka __vector(4) long long int}' using type 'int'
blas_matrix_avx2.c: blas_matrix_avx2.c:495:17: error: incompatible types when initializing type '__m256i {aka __vector(4) long long int}' using type 'int'
blas_matrix_avx2.c: __m256i pi2 = _mm256_setr_m128i( pi1 , pi1 );
blas_matrix_avx2.c: ^~~~~~~~~~~~~~~~~

Number of similar (compiler,implementation) pairs: 4, namely:
CompilerImplementations
gcc -march=native -mtune=native -O2 -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx2
gcc -march=native -mtune=native -O3 -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx2
gcc -march=native -mtune=native -O -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx2
gcc -march=native -mtune=native -Os -fomit-frame-pointer -fwrapv -fPIC -fPIE T:avx2

Compiler output

Implementation: T:ref
Security model: timingleaks
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x561fba478360: v4i64 = X86ISD::VTRUNC 0x561fba478230
try.c: 0x561fba478230: v16i32 = vselect 0x561fba4822b0, 0x561fba3ffd90, 0x561fba478100
try.c: 0x561fba4822b0: v4i1 = X86ISD::PCMPGTM 0x561fba45e9f0, 0x561fba45a580
try.c: 0x561fba45e9f0: v4i64 = X86ISD::VBROADCAST 0x561fba3fcf30
try.c: 0x561fba3fcf30: i64,ch = load<LD8[%lsr.iv6971]> 0x561fba36f900, 0x561fba4553e0, undef:i64
try.c: 0x561fba4553e0: i64,ch = CopyFromReg 0x561fba36f900, Register:i64 %vreg50
try.c: 0x561fba45a7e0: i64 = Register %vreg50
try.c: 0x561fba3fe400: i64 = undef
try.c: 0x561fba45a580: v4i64,ch = CopyFromReg 0x561fba36f900, Register:v4i64 %vreg13
try.c: 0x561fba45f240: v4i64 = Register %vreg13
try.c: 0x561fba3ffd90: v16i32 = X86ISD::VBROADCAST 0x561fba45ec50
try.c: 0x561fba45ec50: i32,ch = load<LD4[ConstantPool]> 0x561fba36f900, 0x561fba407be0, undef:i64
try.c: 0x561fba407be0: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x561fba4442c0: i64 = TargetConstantPool<i32 1> 0
try.c: 0x561fba3fe400: i64 = undef
try.c: 0x561fba478100: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: 0x561fba477fd0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ref

Compiler output

Implementation: T:ref
Security model: timingleaks
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x55a9fb858230: v4i64 = X86ISD::VTRUNC 0x55a9fb858100
try.c: 0x55a9fb858100: v16i32 = vselect 0x55a9fb8687a0, 0x55a9fb7e53c0, 0x55a9fb857fd0
try.c: 0x55a9fb8687a0: v4i1 = X86ISD::PCMPGTM 0x55a9fb84fbe0, 0x55a9fb84b770
try.c: 0x55a9fb84fbe0: v4i64 = X86ISD::VBROADCAST 0x55a9fb7e5880
try.c: 0x55a9fb7e5880: i64,ch = load<LD8[%lsr.iv6971]> 0x55a9fb749a30, 0x55a9fb7ef950, undef:i64
try.c: 0x55a9fb7ef950: i64,ch = CopyFromReg 0x55a9fb749a30, Register:i64 %vreg50
try.c: 0x55a9fb84b9d0: i64 = Register %vreg50
try.c: 0x55a9fb7c8260: i64 = undef
try.c: 0x55a9fb84b770: v4i64,ch = CopyFromReg 0x55a9fb749a30, Register:v4i64 %vreg13
try.c: 0x55a9fb850430: v4i64 = Register %vreg13
try.c: 0x55a9fb7e53c0: v16i32 = X86ISD::VBROADCAST 0x55a9fb84fe40
try.c: 0x55a9fb84fe40: i32,ch = load<LD4[ConstantPool]> 0x55a9fb749a30, 0x55a9fb7ebbb0, undef:i64
try.c: 0x55a9fb7ebbb0: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x55a9fb7c8be0: i64 = TargetConstantPool<i32 1> 0
try.c: 0x55a9fb7c8260: i64 = undef
try.c: 0x55a9fb857fd0: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: 0x55a9fb857ea0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ref

Compiler output

Implementation: T:ref
Security model: timingleaks
Compiler: clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x557458278f00: v4i64 = X86ISD::VTRUNC 0x557458278dd0
try.c: 0x557458278dd0: v16i32 = vselect 0x55745827ca20, 0x5574581f7f90, 0x557458278ca0
try.c: 0x55745827ca20: v4i1 = X86ISD::PCMPGTM 0x5574582758f0, 0x557458271480
try.c: 0x5574582758f0: v4i64 = X86ISD::VBROADCAST 0x557458231980
try.c: 0x557458231980: i64,ch = load<LD8[%lsr.iv6971]> 0x557458186950, 0x557458268910, undef:i64
try.c: 0x557458268910: i64,ch = CopyFromReg 0x557458186950, Register:i64 %vreg50
try.c: 0x5574582716e0: i64 = Register %vreg50
try.c: 0x5574581f6600: i64 = undef
try.c: 0x557458271480: v4i64,ch = CopyFromReg 0x557458186950, Register:v4i64 %vreg13
try.c: 0x557458276140: v4i64 = Register %vreg13
try.c: 0x5574581f7f90: v16i32 = X86ISD::VBROADCAST 0x557458275b50
try.c: 0x557458275b50: i32,ch = load<LD4[ConstantPool]> 0x557458186950, 0x557458230f60, undef:i64
try.c: 0x557458230f60: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x55745825f5a0: i64 = TargetConstantPool<i32 1> 0
try.c: 0x5574581f6600: i64 = undef
try.c: 0x557458278ca0: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: 0x557458278b70: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ref

Compiler output

Implementation: T:ssse3
Security model: timingleaks
Compiler: clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x55d18e5d7b60: v4i64 = X86ISD::VTRUNC 0x55d18e5d7a30
try.c: 0x55d18e5d7a30: v16i32 = vselect 0x55d18e5d4540, 0x55d18e57b8a0, 0x55d18e5d7900
try.c: 0x55d18e5d4540: v4i1 = X86ISD::PCMPGTM 0x55d18e5cf8f0, 0x55d18e5cb480
try.c: 0x55d18e5cf8f0: v4i64 = X86ISD::VBROADCAST 0x55d18e5956a0
try.c: 0x55d18e5956a0: i64,ch = load<LD8[%lsr.iv6971]> 0x55d18e4e0950, 0x55d18e5b6f70, undef:i64
try.c: 0x55d18e5b6f70: i64,ch = CopyFromReg 0x55d18e4e0950, Register:i64 %vreg50
try.c: 0x55d18e5cb6e0: i64 = Register %vreg50
try.c: 0x55d18e596b70: i64 = undef
try.c: 0x55d18e5cb480: v4i64,ch = CopyFromReg 0x55d18e4e0950, Register:v4i64 %vreg13
try.c: 0x55d18e5d0140: v4i64 = Register %vreg13
try.c: 0x55d18e57b8a0: v16i32 = X86ISD::VBROADCAST 0x55d18e5cfb50
try.c: 0x55d18e5cfb50: i32,ch = load<LD4[ConstantPool]> 0x55d18e4e0950, 0x55d18e578610, undef:i64
try.c: 0x55d18e578610: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x55d18e5b8f20: i64 = TargetConstantPool<i32 1> 0
try.c: 0x55d18e596b70: i64 = undef
try.c: 0x55d18e5d7900: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: 0x55d18e5d77d0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O2 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ssse3

Compiler output

Implementation: T:ssse3
Security model: timingleaks
Compiler: clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x563a4d4f9670: v4i64 = X86ISD::VTRUNC 0x563a4d4f9540
try.c: 0x563a4d4f9540: v16i32 = vselect 0x563a4d4da2b0, 0x563a4d476420, 0x563a4d4f9410
try.c: 0x563a4d4da2b0: v4i1 = X86ISD::PCMPGTM 0x563a4d4d32b0, 0x563a4d4cf840
try.c: 0x563a4d4d32b0: v4i64 = X86ISD::VBROADCAST 0x563a4d4768e0
try.c: 0x563a4d4768e0: i64,ch = load<LD8[%lsr.iv6971]> 0x563a4d3cda40, 0x563a4d485a30, undef:i64
try.c: 0x563a4d485a30: i64,ch = CopyFromReg 0x563a4d3cda40, Register:i64 %vreg50
try.c: 0x563a4d4cfaa0: i64 = Register %vreg50
try.c: 0x563a4d44bbf0: i64 = undef
try.c: 0x563a4d4cf840: v4i64,ch = CopyFromReg 0x563a4d3cda40, Register:v4i64 %vreg13
try.c: 0x563a4d4d3b00: v4i64 = Register %vreg13
try.c: 0x563a4d476420: v16i32 = X86ISD::VBROADCAST 0x563a4d4d3510
try.c: 0x563a4d4d3510: i32,ch = load<LD4[ConstantPool]> 0x563a4d3cda40, 0x563a4d46c450, undef:i64
try.c: 0x563a4d46c450: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x563a4d44c570: i64 = TargetConstantPool<i32 1> 0
try.c: 0x563a4d44bbf0: i64 = undef
try.c: 0x563a4d4f9410: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: 0x563a4d4f92e0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ssse3

Compiler output

Implementation: T:ssse3
Security model: timingleaks
Compiler: clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
try.c: fatal error: error in backend: Cannot select: 0x564e52c1c860: v4i64 = X86ISD::VTRUNC 0x564e52c1c730
try.c: 0x564e52c1c730: v16i32 = vselect 0x564e52c00050, 0x564e52ba9870, 0x564e52c1c600
try.c: 0x564e52c00050: v4i1 = X86ISD::PCMPGTM 0x564e52c01090, 0x564e52bfda70
try.c: 0x564e52c01090: v4i64 = X86ISD::VBROADCAST 0x564e52ba2860
try.c: 0x564e52ba2860: i64,ch = load<LD8[%lsr.iv6971]> 0x564e52b13960, 0x564e52be71a0, undef:i64
try.c: 0x564e52be71a0: i64,ch = CopyFromReg 0x564e52b13960, Register:i64 %vreg50
try.c: 0x564e52bfdcd0: i64 = Register %vreg50
try.c: 0x564e52ba3d30: i64 = undef
try.c: 0x564e52bfda70: v4i64,ch = CopyFromReg 0x564e52b13960, Register:v4i64 %vreg13
try.c: 0x564e52c018e0: v4i64 = Register %vreg13
try.c: 0x564e52ba9870: v16i32 = X86ISD::VBROADCAST 0x564e52c012f0
try.c: 0x564e52c012f0: i32,ch = load<LD4[ConstantPool]> 0x564e52b13960, 0x564e52ba1e40, undef:i64
try.c: 0x564e52ba1e40: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<i32 1> 0
try.c: 0x564e52b9f070: i64 = TargetConstantPool<i32 1> 0
try.c: 0x564e52ba3d30: i64 = undef
try.c: 0x564e52c1c600: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: 0x564e52c1c4d0: i32 = Constant<0>
try.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -march=native -O -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ssse3

Compiler output

Implementation: T:ssse3
Security model: timingleaks
Compiler: clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE
blas_matrix_ref.c: In file included from blas_matrix_ref.c:6:
blas_matrix_ref.c: In file included from ./blas.h:25:
blas_matrix_ref.c: In file included from ./blas_sse.h:16:
blas_matrix_ref.c: ./gf16_sse.h:34:9: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'linear_transform_8x8_128b' that is compiled without support for 'ssse3'
blas_matrix_ref.c: return _mm_shuffle_epi8(tab_l,v&mask_f)^_mm_shuffle_epi8(tab_h,_mm_srli_epi16(v,4)&mask_f);
blas_matrix_ref.c: ^
blas_matrix_ref.c: ./gf16_sse.h:34:42: error: always_inline function '_mm_shuffle_epi8' requires target feature 'ssse3', but would be inlined into function 'linear_transform_8x8_128b' that is compiled without support for 'ssse3'
blas_matrix_ref.c: return _mm_shuffle_epi8(tab_l,v&mask_f)^_mm_shuffle_epi8(tab_h,_mm_srli_epi16(v,4)&mask_f);
blas_matrix_ref.c: ^
blas_matrix_ref.c: fatal error: error in backend: Cannot select: 0x562e76f2ab20: v16i8 = X86ISD::PSHUFB 0x562e76ea91b0, 0x562e76f6ad30
blas_matrix_ref.c: 0x562e76ea91b0: v16i8,ch = CopyFromReg 0x562e76e275b0, Register:v16i8 %vreg15
blas_matrix_ref.c: 0x562e76f6a150: v16i8 = Register %vreg15
blas_matrix_ref.c: 0x562e76f6ad30: v16i8 = bitcast 0x562e76f68e10
blas_matrix_ref.c: 0x562e76f68e10: v2i64 = and 0x562e76f57cc0, 0x562e76f69790
blas_matrix_ref.c: 0x562e76f57cc0: v2i64 = bitcast 0x562e76f57340
blas_matrix_ref.c: 0x562e76f57340: v8i16 = X86ISD::VSRLI 0x562e76ea9670, Constant:i8<4>
blas_matrix_ref.c: 0x562e76ea9670: v8i16 = bitcast 0x562e76f57800
blas_matrix_ref.c: 0x562e76f57800: v2i64,ch = load<LD16[%uglygep1617](align=1)(tbaa=<0x562e76aac628>)> 0x562e76e275b0, 0x562e76f68950, undef:i64
blas_matrix_ref.c: 0x562e76f68950: i64 = add 0x562e76f57470, 0x562e76f57f20
blas_matrix_ref.c: 0x562e76f57470: i64,ch = CopyFromReg 0x562e76e275b0, Register:i64 %vreg12
blas_matrix_ref.c: 0x562e76f58180: i64 = Register %vreg12
blas_matrix_ref.c: 0x562e76f57f20: i64,ch = CopyFromReg 0x562e76e275b0, Register:i64 %vreg17
blas_matrix_ref.c: 0x562e76f57a60: i64 = Register %vreg17
blas_matrix_ref.c: 0x562e76ea97a0: i64 = undef
blas_matrix_ref.c: 0x562e76f6ac00: i8 = Constant<4>
blas_matrix_ref.c: ...

Number of similar (compiler,implementation) pairs: 1, namely:
CompilerImplementations
clang -mcpu=native -O3 -fomit-frame-pointer -fwrapv -Qunused-arguments -fPIC -fPIE T:ssse3