Skip to content
2 changes: 1 addition & 1 deletion src/coreclr/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function(create_standalone_jit)
endif ()
endfunction()

if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX))
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_I386)
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_SIMD>)
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_HW_INTRINSICS>)
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_MASKED_HW_INTRINSICS>)
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2255,8 +2255,13 @@ void CodeGen::genCodeForCast(GenTreeOp* tree)
}
else if (varTypeIsFloating(tree->gtOp1))
{
#ifdef TARGET_XARCH
// These casts should have been lowered to HWIntrinsics
unreached();
#else
// Casts float/double --> int32/int64
genFloatToIntCast(tree);
#endif
}
else if (varTypeIsFloating(targetType))
{
Expand Down
68 changes: 0 additions & 68 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7342,74 +7342,6 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
genProduceReg(treeNode);
}

//------------------------------------------------------------------------
// genFloatToIntCast: Generate code to cast float/double to int/long
//
// Arguments:
// treeNode - The GT_CAST node
//
// Return Value:
// None.
//
// Assumptions:
// Cast is a non-overflow conversion.
// The treeNode must have an assigned register.
// SrcType=float/double and DstType= int32/uint32/int64/uint64
//
// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
//
void CodeGen::genFloatToIntCast(GenTree* treeNode)
{
// we don't expect to see overflow detecting float/double --> int type conversions here
// as they should have been converted into helper calls by front-end.
assert(treeNode->OperIs(GT_CAST));
assert(!treeNode->gtOverflow());

regNumber targetReg = treeNode->GetRegNum();
assert(genIsValidIntReg(targetReg));

GenTree* op1 = treeNode->AsOp()->gtOp1;
#ifdef DEBUG
if (op1->isUsedFromReg())
{
assert(genIsValidFloatReg(op1->GetRegNum()));
}
#endif

var_types dstType = treeNode->CastToType();
var_types srcType = op1->TypeGet();
assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));

// We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
// For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
// front-end or lowering phase to have generated two levels of cast. The first one is
// for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
// the required smaller int type.
emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));

// We shouldn't be seeing uint64 here as it should have been converted
// into a helper call by either front-end or lowering phase, unless we have AVX512
// accelerated conversions.
assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
compiler->canUseEvexEncodingDebugOnly());

// If the dstType is TYP_UINT, we have 32-bits to encode the
// float number. Any of 33rd or above bits can be the sign bit.
// To achieve it we pretend as if we are converting it to a long.
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) && !compiler->canUseEvexEncoding())
{
dstType = TYP_LONG;
}

// Note that we need to specify dstType here so that it will determine
// the size of destination integer register and also the rex.w prefix.
genConsumeOperands(treeNode->AsOp());
instruction ins = ins_FloatConv(dstType, srcType);
GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
genProduceReg(treeNode);
}

//------------------------------------------------------------------------
// genCkfinite: Generate code for ckfinite opcode.
//
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21832,19 +21832,19 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type,
switch (simdTargetBaseType)
{
case TYP_INT:
cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt32WithTruncationSaturation;
cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt32WithTruncatedSaturation;
break;

case TYP_UINT:
cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt32WithTruncationSaturation;
cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt32WithTruncatedSaturation;
break;

case TYP_LONG:
cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt64WithTruncationSaturation;
cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt64WithTruncatedSaturation;
break;

case TYP_ULONG:
cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt64WithTruncationSaturation;
cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt64WithTruncatedSaturation;
break;

default:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // AVX512v2_X64
{ NI_Illegal, NI_Illegal }, // AVX512v3_X64
{ NI_Illegal, NI_Illegal }, // AVX10v1_X64
{ NI_Illegal, NI_Illegal }, // AVX10v2_X64
{ FIRST_NI_AVX10v2_X64, LAST_NI_AVX10v2_X64 }, // AVX10v2_X64
{ NI_Illegal, NI_Illegal }, // AES_X64
{ NI_Illegal, NI_Illegal }, // AVX512VP2INTERSECT_X64
{ NI_Illegal, NI_Illegal }, // AVXIFMA_X64
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1011,6 +1011,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_AVX512:
case InstructionSet_AVX512_X64:
case InstructionSet_AVX512v2:
case InstructionSet_AVX10v2:
case InstructionSet_AVX10v2_X64:
case InstructionSet_AVXVNNIINT:
case InstructionSet_AVXVNNIINT_V512:
{
Expand Down Expand Up @@ -3502,6 +3504,10 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
case NI_AVX512_X64_ConvertToInt64:
case NI_AVX512_X64_ConvertToUInt64:
case NI_AVX512_X64_ConvertToUInt64WithTruncation:
case NI_AVX10v2_ConvertToInt32WithTruncatedSaturation:
case NI_AVX10v2_ConvertToUInt32WithTruncatedSaturation:
case NI_AVX10v2_X64_ConvertToInt64WithTruncatedSaturation:
case NI_AVX10v2_X64_ConvertToUInt64WithTruncatedSaturation:
{
assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT);
emitAttr attr = emitTypeSize(targetType);
Expand All @@ -3512,7 +3518,6 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
}

case NI_AVX512_ConvertToVector128UInt32:
case NI_AVX512_ConvertToVector128UInt32WithSaturation:
case NI_AVX512_ConvertToVector256Int32:
case NI_AVX512_ConvertToVector256UInt32:
{
Expand All @@ -3535,6 +3540,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
case NI_AVX512_ConvertToVector128SByteWithSaturation:
case NI_AVX512_ConvertToVector128UInt16:
case NI_AVX512_ConvertToVector128UInt16WithSaturation:
case NI_AVX512_ConvertToVector128UInt32WithSaturation:
case NI_AVX512_ConvertToVector256Byte:
case NI_AVX512_ConvertToVector256ByteWithSaturation:
case NI_AVX512_ConvertToVector256Int16:
Expand Down
20 changes: 16 additions & 4 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1077,19 +1077,31 @@ HARDWARE_INTRINSIC(AVX512v3, ExpandLoad,
#define FIRST_NI_AVX10v2 NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32
HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis32, INS_vcvttsd2sis32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToUInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis32, INS_vcvttsd2usis32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
#define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// 64-bit only intrinsics for AVX10v2
#define FIRST_NI_AVX10v2_X64 NI_AVX10v2_X64_ConvertToInt64WithTruncatedSaturation
HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis64, INS_vcvttsd2sis64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToUInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis64, INS_vcvttsd2usis64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
#define LAST_NI_AVX10v2_X64 NI_AVX10v2_X64_ConvertToUInt64WithTruncatedSaturation

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
Expand Down
Loading
Loading