@@ -406,8 +406,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
406
406
// Handle case where op2 is operation that needs embedded mask
407
407
GenTree* op2 = intrin.op2 ;
408
408
assert (intrin.id == NI_Sve_ConditionalSelect);
409
- assert (op2->isContained ());
410
409
assert (op2->OperIsHWIntrinsic ());
410
+ assert (op2->isContained ());
411
411
412
412
// Get the registers and intrinsics that needs embedded mask
413
413
const HWIntrinsic intrinEmbMask (op2->AsHWIntrinsic ());
@@ -439,10 +439,54 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
439
439
{
440
440
case 1 :
441
441
assert (!instrIsRMW);
442
+
442
443
if (targetReg != falseReg)
443
444
{
444
- GetEmitter ()->emitIns_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg);
445
+ // If targetReg is not the same as `falseReg` then need to move
446
+ // the `falseReg` to `targetReg`.
447
+
448
+ if (intrin.op3 ->isContained ())
449
+ {
450
+ assert (intrin.op3 ->IsVectorZero ());
451
+ if (intrin.op1 ->isContained ())
452
+ {
453
+ // We already skip importing ConditionalSelect if op1 == trueAll, however
454
+ // if we still see it here, it is because we wrapped the predicated instruction
455
+ // inside ConditionalSelect.
456
+ // As such, no need to move the `falseReg` to `targetReg`
457
+ // because the predicated instruction will eventually set it.
458
+ assert (intrin.op1 ->IsMaskAllBitsSet ());
459
+ }
460
+ else
461
+ {
462
+ // If falseValue is zero, just zero out those lanes of targetReg using `movprfx`
463
+ // and /Z
464
+ GetEmitter ()->emitIns_R_R_R (INS_sve_movprfx, emitSize, targetReg, maskReg, targetReg,
465
+ opt);
466
+ }
467
+ }
468
+ else if (targetReg == embMaskOp1Reg)
469
+ {
470
+ // target != falseValue, but we do not want to overwrite target with `embMaskOp1Reg`.
471
+ // We will first do the predicate operation and then do conditionalSelect inactive
472
+ // elements from falseValue
473
+
474
+ // We cannot use use `movprfx` here to move falseReg to targetReg because that will
475
+ // overwrite the value of embMaskOp1Reg which is present in targetReg.
476
+ GetEmitter ()->emitIns_R_R_R (insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
477
+
478
+ GetEmitter ()->emitIns_R_R_R_R (INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
479
+ falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED);
480
+ break ;
481
+ }
482
+ else
483
+ {
484
+ // At this point, target != embMaskOp1Reg != falseReg, so just go ahead
485
+ // and move the falseReg unpredicated into targetReg.
486
+ GetEmitter ()->emitIns_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg);
487
+ }
445
488
}
489
+
446
490
GetEmitter ()->emitIns_R_R_R (insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
447
491
break ;
448
492
0 commit comments