Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
0d437e3
JIT ARM64-SVE: Add Sve.Abs() and Sve.Add()
a74nh Mar 11, 2024
e9fa735
Fix sve scaling in enitIns_R_S/S_R
a74nh Mar 22, 2024
5acc122
Revert "Fix sve scaling in enitIns_R_S/S_R"
a74nh Mar 23, 2024
15e893d
Fix sve scaling in enitIns_R_S/S_R
a74nh Mar 23, 2024
c22f458
Restore testing
a74nh Mar 24, 2024
508a52a
Use NaturalScale_helper for vector load/stores
a74nh Mar 25, 2024
5825c20
Merge remote-tracking branch 'origin/main' into api_abs_github
kunalspathak Mar 28, 2024
9081fa3
Merge remote-tracking branch 'origin/main' into api_abs_github
kunalspathak Apr 3, 2024
d4f3c86
wip
kunalspathak Apr 3, 2024
20defbd
Add ConditionalSelect() APIs
kunalspathak Apr 4, 2024
c70367f
Handle ConditionalSelect in JIT
kunalspathak Apr 4, 2024
1896b21
Add test coverage
kunalspathak Apr 4, 2024
2e0d022
Update the test cases
kunalspathak Apr 6, 2024
185fcfa
jit format
kunalspathak Apr 6, 2024
88e8f87
Merge branch 'sve-conditional-select' into abi_abs_conditional
kunalspathak Apr 6, 2024
ebcb174
fix merge conflicts
kunalspathak Apr 6, 2024
28aa7b7
Make predicated/unpredicated work with ConditionalSelect
kunalspathak Apr 7, 2024
1c7b4b2
Misc. changes
kunalspathak Apr 7, 2024
6adf6c6
jit format
kunalspathak Apr 7, 2024
ecf4e01
jit format
kunalspathak Apr 7, 2024
c6fef82
Merge remote-tracking branch 'origin/main' into abi_abs_conditional
kunalspathak Apr 7, 2024
d780156
Handle all the conditions correctly
kunalspathak Apr 8, 2024
eb4d1dc
jit format
kunalspathak Apr 9, 2024
00aacab
fix some spacing
kunalspathak Apr 9, 2024
f716958
Removed the assert
kunalspathak Apr 9, 2024
5eadcf9
fix the largest vector size to 64 to fix #100366
kunalspathak Apr 9, 2024
bc6e58d
review feedback
kunalspathak Apr 11, 2024
19ec4b6
wip
kunalspathak Apr 11, 2024
ed7c781
Add SVE feature detection for Windows
kunalspathak Apr 11, 2024
f966b5e
fix the check for invalid alignment
kunalspathak Apr 11, 2024
f0c81f1
Revert "Add SVE feature detection for Windows"
kunalspathak Apr 11, 2024
89ede7d
Handle case where Abs() is wrapped in another conditionalSelect
kunalspathak Apr 11, 2024
1f44b2f
jit format
kunalspathak Apr 12, 2024
9712a16
fix the size comparison
kunalspathak Apr 12, 2024
0ad2e64
HW_Flag_MaskedPredicatedOnlyOperation
kunalspathak Apr 12, 2024
f76e324
Revert the change in emitarm64.cpp around INS_sve_ldr_mask/INS_sve_st…
kunalspathak Apr 12, 2024
8af7108
Fix the condition for lowering
kunalspathak Apr 14, 2024
e934e26
address review feedback for movprfx
kunalspathak Apr 22, 2024
0daeac7
Move the special handling of Vector<>.Zero from lowerer to importer
kunalspathak Apr 22, 2024
635a7d7
Rename IsEmbeddedMaskedOperation/IsOptionalEmbeddedMaskedOperation
kunalspathak Apr 22, 2024
19da982
Add more test coverage for conditionalSelect
kunalspathak Apr 22, 2024
f1b8b17
Rename test method name
kunalspathak Apr 23, 2024
409a039
Add more test coverage for conditionalSelect:Abs
kunalspathak Apr 23, 2024
0c8a14d
jit format
kunalspathak Apr 23, 2024
53c5eb3
Add logging on test methods
kunalspathak Apr 23, 2024
868229f
Merge remote-tracking branch 'origin/main' into abi_abs_conditional
kunalspathak Apr 23, 2024
9ae3c78
Add the missing movprfx for abs
kunalspathak Apr 23, 2024
c8244eb
Add few more scenarios where falseVal is zero
kunalspathak Apr 23, 2024
f6eb1fe
Make sure LoadVector is marked as explicit needing mask
kunalspathak Apr 23, 2024
83e1b1b
revisit the codegen logic
kunalspathak Apr 24, 2024
c78e0c7
Remove commented code and add some other comments
kunalspathak Apr 24, 2024
6aa2386
jit format
kunalspathak Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1654,15 +1654,13 @@ void CodeGen::genConsumeRegs(GenTree* tree)
// Update the life of the lcl var.
genUpdateLife(tree);
}
#ifdef TARGET_XARCH
#ifdef FEATURE_HW_INTRINSICS
else if (tree->OperIs(GT_HWINTRINSIC))
{
GenTreeHWIntrinsic* hwintrinsic = tree->AsHWIntrinsic();
genConsumeMultiOpOperands(hwintrinsic);
}
#endif // FEATURE_HW_INTRINSICS
#endif // TARGET_XARCH
else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ, GT_BSWAP, GT_BSWAP16))
{
genConsumeRegs(tree->gtGetOp1());
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3468,6 +3468,7 @@ class Compiler
#if defined(TARGET_ARM64)
GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize);
GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type);
GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize);
#endif

//------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7875,7 +7875,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va

// TODO-SVE: Don't assume 128bit vectors
// Predicate size is vector length / 8
scale = NaturalScale_helper(EA_2BYTE);
scale = 2;
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate

if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale)))
Expand Down Expand Up @@ -8154,7 +8154,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va

// TODO-SVE: Don't assume 128bit vectors
// Predicate size is vector length / 8
scale = NaturalScale_helper(EA_2BYTE);
scale = 2;
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate

if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale)))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitloongarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ enum EmitCallType

EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
// EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
// EC_FUNC_ADDR, // Direct call to an absolute address
// EC_FUNC_ADDR, // Direct call to an absolute address

EC_INDIR_R, // Indirect call via register

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitriscv64.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ enum EmitCallType

EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
// EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
// EC_FUNC_ADDR, // Direct call to an absolute address
// EC_FUNC_ADDR, // Direct call to an absolute address

// EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable)
EC_INDIR_R, // Indirect call via register
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18008,7 +18008,11 @@ bool GenTree::canBeContained() const
}
else if (OperIsHWIntrinsic() && !isContainableHWIntrinsic())
{
#ifdef TARGET_XARCH
return isEvexEmbeddedMaskingCompatibleHWIntrinsic();
#elif TARGET_ARM64
return HWIntrinsicInfo::IsEmbeddedMaskedOperation(AsHWIntrinsic()->GetHWIntrinsicId());
#endif
}

return true;
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ enum GenTreeFlags : unsigned int

GTF_MDARRLOWERBOUND_NONFAULTING = 0x20000000, // GT_MDARR_LOWER_BOUND -- An MD array lower bound operation that cannot fault. Same as GT_IND_NONFAULTING.

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
#ifdef FEATURE_HW_INTRINSICS
GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
#endif // FEATURE_HW_INTRINSICS
};

inline constexpr GenTreeFlags operator ~(GenTreeFlags a)
Expand Down Expand Up @@ -2223,7 +2223,7 @@ struct GenTree
gtFlags &= ~GTF_ICON_HDL_MASK;
}

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
#ifdef FEATURE_HW_INTRINSICS

bool IsEmbMaskOp()
{
Expand All @@ -2237,7 +2237,7 @@ struct GenTree
gtFlags |= GTF_HW_EM_OP;
}

#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
#endif // FEATURE_HW_INTRINSICS

static bool HandleKindDataIsInvariant(GenTreeFlags flags);

Expand Down
58 changes: 31 additions & 27 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,37 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
GenTree* op3 = nullptr;
GenTree* op4 = nullptr;

switch (numArgs)
{
case 4:
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd);
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound);
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

case 3:
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
assert(addRangeCheckIfNeeded(intrinsic, op3, mustExpand, immLowerBound, immUpperBound) == op3);
break;

case 2:
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

case 1:
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
break;

default:
break;
}

switch (numArgs)
{
case 0:
Expand All @@ -1407,8 +1438,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 1:
{
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

if ((category == HW_Category_MemoryLoad) && op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
Expand Down Expand Up @@ -1467,10 +1496,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 2:
{
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

retNode = isScalar
? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, intrinsic)
: gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize);
Expand Down Expand Up @@ -1524,10 +1549,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 3:
{
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

#ifdef TARGET_ARM64
if (intrinsic == NI_AdvSimd_LoadAndInsertScalar)
{
Expand Down Expand Up @@ -1569,12 +1590,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

case 4:
{
op4 = getArgForHWIntrinsic(sigReader.GetOp4Type(), sigReader.op4ClsHnd);
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, immLowerBound, immUpperBound);
op3 = getArgForHWIntrinsic(sigReader.GetOp3Type(), sigReader.op3ClsHnd);
op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd);
op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);

assert(!isScalar);
retNode =
gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
Expand All @@ -1591,17 +1606,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
}

#if defined(TARGET_ARM64)
if (HWIntrinsicInfo::IsMaskedOperation(intrinsic))
{
assert(numArgs > 0);
GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
if (!varTypeIsMask(op1))
{
// Op1 input is a vector. HWInstrinsic requires a mask.
retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize);
}
}

if (retType != nodeRetType)
{
// HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector.
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ enum HWIntrinsicFlag : unsigned int
// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low register.
HW_Flag_LowMaskedOperation = 0x40000,

// The intrinsic uses a mask in arg1 to select elements present in the result, which is not present in the API call
HW_Flag_EmbeddedMaskedOperation = 0x80000,

#else
#error Unsupported platform
#endif
Expand Down Expand Up @@ -872,7 +875,7 @@ struct HWIntrinsicInfo
static bool IsMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return ((flags & HW_Flag_MaskedOperation) != 0) || IsLowMaskedOperation(id);
return ((flags & HW_Flag_MaskedOperation) != 0) || IsLowMaskedOperation(id) || IsEmbeddedMaskedOperation(id);
}

static bool IsLowMaskedOperation(NamedIntrinsic id)
Expand All @@ -881,6 +884,12 @@ struct HWIntrinsicInfo
return (flags & HW_Flag_LowMaskedOperation) != 0;
}

static bool IsEmbeddedMaskedOperation(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_EmbeddedMaskedOperation) != 0;
}

#endif // TARGET_ARM64

static bool HasSpecialSideEffect(NamedIntrinsic id)
Expand Down
20 changes: 17 additions & 3 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2222,9 +2222,8 @@ GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type,
assert(varTypeIsSIMD(node));

// ConvertVectorToMask uses cmpne which requires an embedded mask.
GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType,
simdSize);
GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize);
}

//------------------------------------------------------------------------
Expand All @@ -2246,4 +2245,19 @@ GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, va
node->GetSimdSize());
}

//------------------------------------------------------------------------
// gtNewSimdEmbeddedMaskNode: Create an embedded mask
//
// Arguments:
// simdBaseJitType -- the base jit type of the nodes being masked
// simdSize -- the simd size of the nodes being masked
//
// Return Value:
// The mask
//
GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize)
{
return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
}

#endif // FEATURE_HW_INTRINSICS
90 changes: 83 additions & 7 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,67 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
unreached();
}
}
else if (intrin.numOperands >= 2 && intrin.op2->IsEmbMaskOp())
{
// Handle case where op2 is operation that needs embedded mask
GenTree* op2 = intrin.op2;
const HWIntrinsic intrinOp2(op2->AsHWIntrinsic());
instruction insOp2 = HWIntrinsicInfo::lookupIns(intrinOp2.id, intrinOp2.baseType);

assert(intrin.id == NI_Sve_ConditionalSelect);
assert(op2->isContained());
assert(op2->OperIsHWIntrinsic());

// if (isRMW)
//{
// op1Reg contains a mask, op2Reg contains the RMW register.

if (targetReg != op1Reg)
{
assert(targetReg != op3Reg);
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true);
}

switch (intrin.numOperands)
{
case 2:
GetEmitter()->emitIns_R_R(insOp2, emitSize, targetReg, op1Reg, opt);
break;

case 3:
assert(targetReg != op3Reg);
GetEmitter()->emitIns_R_R_R(insOp2, emitSize, targetReg, op1Reg, op3Reg, opt);
break;

default:
unreached();
}
//}
// else
//{
//// op1Reg contains the RMW register.
// if (targetReg != op1Reg)
//{
// assert(targetReg != op2Reg);
// assert(targetReg != op3Reg);
// GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true);
// }

// switch (intrin.numOperands)
//{
// case 2:
// GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
// break;

// case 3:
// GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
// break;

// default:
// unreached();
//}
//}
}
else
{
assert(!hasImmediateOperand);
Expand All @@ -419,6 +480,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
else if (HWIntrinsicInfo::IsScalable(intrin.id))
{
assert(!node->IsEmbMaskOp());
// This generates unpredicated version
// Predicated should be taken care above `intrin.op2->IsEmbMaskOp()`
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt,
INS_SCALABLE_OPTS_UNPREDICATED);
}
else if (isRMW)
{
if (targetReg != op1Reg)
Expand All @@ -437,17 +506,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;

case 3:
assert(isRMW);
if (targetReg != op1Reg)
if (isRMW)
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);
if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true);
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
else
{
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt,
INS_SCALABLE_OPTS_UNPREDICATED);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
break;

default:
unreached();
}
Expand Down
Loading