Skip to content

Commit 6d0e3e5

Browse files
authored
JIT: Unspill normalize-on-load variables using exact small type (#90318)
The existing logic would sometimes unspill using the type of the local that is being unspilled. This type is often wider than the exact small type in the LclVarDsc, since NOL locals are normally expanded as CAST(<small type>, LCL_VAR<int>). This causes problems since morph will in some cases avoid inserting normalization for NOL locals when it has a subrange assertion available. This optimization relies on the backend always ensuring that the local will be normalized as part of unspilling and args homing. Size-wise regressions are expected on xarch since the encoding of the normalizing load is larger. However, as we have seen before, using wide loads can cause significant store-forwarding stalls which can have large negative impact on performance, so overall there is an expected perf benefit of using the small loads (in addition to the correctness fix). Fix #90219
1 parent 1d09abf commit 6d0e3e5

File tree

4 files changed

+170
-16
lines changed

4 files changed

+170
-16
lines changed

src/coreclr/jit/codegenlinear.cpp

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,39 +1211,66 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
12111211
// Reset spilled flag, since we are going to load a local variable from its home location.
12121212
unspillTree->gtFlags &= ~GTF_SPILLED;
12131213

1214-
GenTreeLclVar* lcl = unspillTree->AsLclVar();
1215-
LclVarDsc* varDsc = compiler->lvaGetDesc(lcl);
1216-
var_types spillType = varDsc->GetRegisterType(lcl);
1217-
assert(spillType != TYP_UNDEF);
1214+
GenTreeLclVar* lcl = unspillTree->AsLclVar();
1215+
LclVarDsc* varDsc = compiler->lvaGetDesc(lcl);
1216+
var_types unspillType = varDsc->GetRegisterType(lcl);
1217+
assert(unspillType != TYP_UNDEF);
12181218

12191219
// TODO-Cleanup: The following code could probably be further merged and cleaned up.
12201220
#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
1221-
// Load local variable from its home location.
1222-
// Never allow truncating the locals here, otherwise a subsequent
1223-
// use of the local with a wider type would see the truncated
1224-
// value. We do allow wider loads as those can be efficient even
1225-
// when unaligned and might be smaller encoding wise (on xarch).
1226-
var_types lclLoadType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetStackSlotHomeType();
1227-
assert(lclLoadType != TYP_UNDEF);
1228-
if (genTypeSize(spillType) < genTypeSize(lclLoadType))
1221+
1222+
// Pick type to reload register from stack with. Note that in
1223+
// general, the type of 'lcl' does not have any relation to the
1224+
// type of 'varDsc':
1225+
//
1226+
// * For normalize-on-load (NOL) locals it is wider under normal
1227+
// circumstances, where morph has added a cast on top. In some
1228+
// cases it is the same, when morph has used a subrange assertion
1229+
// to avoid normalizing.
1230+
//
1231+
// * For all locals it can be narrower in some cases, when
1232+
// lowering optimizes to use a smaller typed `cmp` (e.g. 32-bit cmp
1233+
// for 64-bit local, or 8-bit cmp for 16-bit local).
1234+
//
1235+
// * For byrefs it can differ in GC-ness (TYP_I_IMPL vs TYP_BYREF).
1236+
//
1237+
// In the NOL case the potential use of subrange assertions means
1238+
// we always have to normalize, even if 'lcl' is wide; we could
1239+
// have a GTF_SPILLED LCL_VAR<int>(NOL local) with a future
1240+
// LCL_VAR<ushort>(same NOL local), where the latter local then
1241+
// relies on the normalization to have happened here as part of
1242+
// unspilling.
1243+
//
1244+
if (varDsc->lvNormalizeOnLoad())
12291245
{
1230-
spillType = lclLoadType;
1246+
unspillType = varDsc->TypeGet();
1247+
}
1248+
else
1249+
{
1250+
// Potentially narrower -- see if we should widen.
1251+
var_types lclLoadType = varDsc->GetStackSlotHomeType();
1252+
assert(lclLoadType != TYP_UNDEF);
1253+
if (genTypeSize(unspillType) < genTypeSize(lclLoadType))
1254+
{
1255+
unspillType = lclLoadType;
1256+
}
12311257
}
12321258

12331259
#if defined(TARGET_LOONGARCH64)
12341260
if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum()))
12351261
{
1236-
spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
1262+
unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
12371263
}
12381264
#endif
12391265
#elif defined(TARGET_ARM)
12401266
// No normalizing for ARM
12411267
#else
12421268
NYI("Unspilling not implemented for this target architecture.");
12431269
#endif
1270+
12441271
bool reSpill = ((unspillTree->gtFlags & GTF_SPILL) != 0);
12451272
bool isLastUse = lcl->IsLastUse(0);
1246-
genUnspillLocal(lcl->GetLclNum(), spillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse);
1273+
genUnspillLocal(lcl->GetLclNum(), unspillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse);
12471274
}
12481275
else if (unspillTree->IsMultiRegLclVar())
12491276
{
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using Xunit;
5+
6+
// Generated by Fuzzlyn v1.5 on 2023-08-05 16:08:41
7+
// Run on X86 Windows
8+
// Seed: 7610693270284065118
9+
// Reduced from 12.0 KiB to 2.2 KiB in 00:01:06
10+
// Debug: Outputs 1
11+
// Release: Outputs 2
12+
public interface I0
13+
{
14+
}
15+
16+
public class C0 : I0
17+
{
18+
public ulong F0;
19+
public uint F4;
20+
public C0(ulong f0, uint f4)
21+
{
22+
F0 = f0;
23+
F4 = f4;
24+
}
25+
}
26+
27+
public class Runtime_90219
28+
{
29+
public static IRuntime s_rt;
30+
public static I0 s_1;
31+
public static uint s_2;
32+
public static byte[] s_4 = new byte[]{0};
33+
public static byte Result;
34+
35+
[Fact]
36+
public static int TestEntryPoint()
37+
{
38+
CollectibleALC alc = new CollectibleALC();
39+
System.Reflection.Assembly asm = alc.LoadFromAssemblyPath(System.Reflection.Assembly.GetExecutingAssembly().Location);
40+
System.Reflection.MethodInfo mi = asm.GetType(typeof(Runtime_90219).FullName).GetMethod(nameof(MainInner));
41+
System.Type runtimeTy = asm.GetType(typeof(Runtime).FullName);
42+
return (int)mi.Invoke(null, new object[]{System.Activator.CreateInstance(runtimeTy)});
43+
}
44+
45+
public static int MainInner(IRuntime rt)
46+
{
47+
s_rt = rt;
48+
var vr8 = new C0(0, 0);
49+
var vr12 = s_4[0];
50+
vr8.F0 *= M2(vr12);
51+
long[] vr9 = new long[]{0};
52+
bool vr10 = (int)M2(0) <= vr9[0];
53+
return Result == 1 ? 100 : 101;
54+
}
55+
56+
public static ulong M2(byte arg0)
57+
{
58+
for (int var0 = 0; var0 < 2; var0++)
59+
{
60+
var vr1 = new C0(0, 0);
61+
var vr3 = new C0(0, 0);
62+
uint vr14 = s_2;
63+
s_rt.WriteLine("c_0", vr14);
64+
uint vr15 = vr3.F4;
65+
s_2 = M3(vr1, vr15);
66+
s_1 = s_1;
67+
s_1 = new C0(0, 0);
68+
s_rt.WriteLine("c_6", var0);
69+
}
70+
71+
var vr5 = new C0(0, 1);
72+
uint vr18 = vr5.F4;
73+
arg0 = (byte)vr18;
74+
var vr7 = new C0(0, 1838341904U);
75+
if ((arg0 >= (byte)M3(vr7, 0)))
76+
{
77+
arg0 <<= 1;
78+
}
79+
80+
s_rt.WriteLine("c_7", arg0);
81+
return 0;
82+
}
83+
84+
public static uint M3(C0 argThis, uint arg0)
85+
{
86+
s_rt.WriteLine("c_0", arg0);
87+
return argThis.F4;
88+
}
89+
}
90+
91+
public interface IRuntime
92+
{
93+
void WriteLine<T>(string site, T value);
94+
}
95+
96+
public class Runtime : IRuntime
97+
{
98+
public void WriteLine<T>(string site, T value)
99+
{
100+
if (typeof(T) == typeof(byte))
101+
Runtime_90219.Result = (byte)(object)value;
102+
}
103+
}
104+
105+
public class CollectibleALC : System.Runtime.Loader.AssemblyLoadContext
106+
{
107+
public CollectibleALC(): base(true)
108+
{
109+
}
110+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
</PropertyGroup>
5+
<ItemGroup>
6+
<Compile Include="$(MSBuildProjectName).cs" />
7+
</ItemGroup>
8+
</Project>

src/tests/issues.targets

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,9 @@
10631063
<ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_76273/Runtime_76273/**">
10641064
<Issue>https://github.com/dotnet/runtimelab/issues/155: Assembly.Load</Issue>
10651065
</ExcludeList>
1066+
<ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/**">
1067+
<Issue>https://github.com/dotnet/runtimelab/issues/155: Assembly.Load</Issue>
1068+
</ExcludeList>
10661069
<ExcludeList Include="$(XunitTestBinBase)/reflection\DefaultInterfaceMethods\Emit\*">
10671070
<Issue>https://github.com/dotnet/runtimelab/issues/155: Reflection.Emit</Issue>
10681071
</ExcludeList>
@@ -1967,6 +1970,9 @@
19671970
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/simpleruntimeeventvalidation/**">
19681971
<Issue>https://github.com/dotnet/runtime/issues/88499</Issue>
19691972
</ExcludeList>
1973+
<ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/*">
1974+
<Issue>https://github.com/dotnet/runtime/issues/90374</Issue>
1975+
</ExcludeList>
19701976
</ItemGroup>
19711977

19721978
<!-- Known failures for mono runtime on Windows -->
@@ -2543,7 +2549,7 @@
25432549
<Issue>https://github.com/dotnet/runtime/issues/48914</Issue>
25442550
</ExcludeList>
25452551
<ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_76273/Runtime_76273/**">
2546-
<Issue>Fuzzlyn</Issue>
2552+
<Issue>https://github.com/dotnet/runtime/issues/90372</Issue>
25472553
</ExcludeList>
25482554
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base/Pause*/**">
25492555
<Issue>https://github.com/dotnet/runtime/issues/73454;https://github.com/dotnet/runtime/pull/61707#issuecomment-973122341</Issue>
@@ -3215,6 +3221,9 @@
32153221
<ExcludeList Include="$(XunitTestBinBase)/Loader/StartupHooks/**">
32163222
<Issue>Loads an assembly from file</Issue>
32173223
</ExcludeList>
3224+
<ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/*">
3225+
<Issue>Loads an assembly from file</Issue>
3226+
</ExcludeList>
32183227
</ItemGroup>
32193228

32203229
<ItemGroup Condition="'$(TargetArchitecture)' == 'wasm'">

0 commit comments

Comments
 (0)