Skip to content

Commit 21439d0

Browse files
authored
[ci] Rework test_runs_on plumbing for release workflows. (#1100)
Progress on #589 and #1097. This changes `configure_target_run.py` to look for the target family in either the "inner family" or the "outer key", so it correctly chooses runners for gfx1151 instead of skipping that test configuration since `gfx115X` and `gfx1151` do not match. I also considered explicit data flow of `test_runs_on` through the workflows, but we preferred to keep some automatic detection so developers triggering workflows do not need to manually line up test families with test runners. Recent test runs: * "Build Windows PyTorch Wheels" with default inputs and ROCm version `7.0.0rc20250804`: https://github.com/ROCm/TheRock/actions/runs/16787740857 * "Build Portable Linux PyTorch Wheels" with default inputs and ROCm version `7.0.0rc20250805`: https://github.com/ROCm/TheRock/actions/runs/16787747432 * "Build Portable Linux PyTorch Wheels" with gfx110X-dgpu and ROCm version `7.0.0rc20250805`: https://github.com/ROCm/TheRock/actions/runs/16787789191
1 parent 231dd86 commit 21439d0

10 files changed

+175
-30
lines changed

.github/workflows/build_portable_linux_pytorch_wheels.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,14 @@ on:
3535
workflow_dispatch:
3636
inputs:
3737
amdgpu_family:
38-
required: true
39-
type: string
38+
type: choice
39+
options:
40+
- gfx110X-dgpu
41+
- gfx1151
42+
- gfx120X-all
43+
- gfx94X-dcgpu
44+
- gfx950-dcgpu
45+
default: gfx94X-dcgpu
4046
python_version:
4147
required: true
4248
type: string
@@ -190,9 +196,9 @@ jobs:
190196
run: python ./build_tools/github_actions/configure_target_run.py
191197

192198
test_pytorch_wheels:
199+
name: Test | ${{ inputs.amdgpu_family }} | ${{ needs.generate_target_to_run.outputs.test_runs_on }}
193200
if: ${{ needs.generate_target_to_run.outputs.test_runs_on != '' }}
194201
needs: [build_pytorch_wheels, generate_target_to_run]
195-
196202
uses: ./.github/workflows/test_pytorch_wheels.yml
197203
with:
198204
amdgpu_family: ${{ inputs.amdgpu_family }}

.github/workflows/build_windows_pytorch_wheels.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,18 @@ on:
2727
workflow_dispatch:
2828
inputs:
2929
amdgpu_family:
30-
required: true
31-
type: string
30+
type: choice
31+
options:
32+
- gfx110X-dgpu
33+
- gfx1151
34+
- gfx120X-all
35+
- gfx94X-dcgpu
36+
- gfx950-dcgpu
37+
default: gfx1151
3238
python_version:
3339
required: true
3440
type: string
35-
default:
41+
default: "3.12"
3642
release_type:
3743
description: The type of release to build ("nightly", or "dev")
3844
type: string
@@ -180,6 +186,7 @@ jobs:
180186
run: python ./build_tools/github_actions/configure_target_run.py
181187

182188
test_pytorch_wheels:
189+
name: Test | ${{ inputs.amdgpu_family }} | ${{ needs.generate_target_to_run.outputs.test_runs_on }}
183190
if: ${{ needs.generate_target_to_run.outputs.test_runs_on != '' }}
184191
needs: [build_pytorch_wheels, generate_target_to_run]
185192
uses: ./.github/workflows/test_pytorch_wheels.yml

.github/workflows/release_portable_linux_packages.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,12 @@ jobs:
198198
uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4
199199
with:
200200
workflow: test_release_packages.yml
201-
inputs: '{ "version": "${{ needs.setup_metadata.outputs.version }}", "tag": "nightly-tarball", "file_name": "${{ env.FILE_NAME }}", "target": "${{ matrix.target_bundle.amdgpu_family }}" }'
201+
inputs: |
202+
{ "version": "${{ needs.setup_metadata.outputs.version }}",
203+
"tag": "nightly-tarball",
204+
"file_name": "${{ env.FILE_NAME }}",
205+
"target": "${{ matrix.target_bundle.amdgpu_family }}",
206+
}
202207
203208
- name: Trigger building PyTorch wheels
204209
if: ${{ github.repository_owner == 'ROCm' }}

.github/workflows/release_portable_linux_pytorch_wheels.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@ on:
2424
workflow_dispatch:
2525
inputs:
2626
amdgpu_family:
27-
required: true
28-
type: string
27+
type: choice
28+
options:
29+
- gfx110X-dgpu
30+
- gfx1151
31+
- gfx120X-all
32+
- gfx94X-dcgpu
33+
- gfx950-dcgpu
34+
default: gfx94X-dcgpu
2935
release_type:
3036
description: The type of release to build ("nightly", or "dev")
3137
type: string
@@ -48,8 +54,8 @@ permissions:
4854
contents: read
4955

5056
jobs:
51-
build:
52-
name: PyTorch Wheels | ${{ inputs.amdgpu_family }} | Python ${{ matrix.python_version }}
57+
release:
58+
name: Release PyTorch | ${{ inputs.amdgpu_family }} | Python ${{ matrix.python_version }}
5359
strategy:
5460
fail-fast: false
5561
matrix:

.github/workflows/release_windows_pytorch_wheels.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@ on:
2424
workflow_dispatch:
2525
inputs:
2626
amdgpu_family:
27-
required: true
28-
type: string
27+
type: choice
28+
options:
29+
- gfx110X-dgpu
30+
- gfx1151
31+
- gfx120X-all
32+
- gfx94X-dcgpu
33+
- gfx950-dcgpu
34+
default: gfx1151
2935
release_type:
3036
description: The type of release to build ("nightly", or "dev")
3137
type: string
@@ -47,7 +53,8 @@ permissions:
4753
contents: read
4854

4955
jobs:
50-
build:
56+
release:
57+
name: Release PyTorch | ${{ inputs.amdgpu_family }} | Python ${{ matrix.python_version }}
5158
strategy:
5259
fail-fast: false
5360
matrix:

.github/workflows/test_release_packages.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ permissions:
1717

1818
jobs:
1919
generate_target_to_run:
20+
name: Generate target_to_run
2021
runs-on: ubuntu-24.04
2122
outputs:
2223
test_runs_on: ${{ steps.configure.outputs.test-runs-on }}
@@ -27,10 +28,10 @@ jobs:
2728
- name: Generating target to run
2829
id: configure
2930
env:
30-
TARGET: ${{ inputs.target }}
31+
TARGET: ${{ inputs.amdgpu_family }}
32+
PLATFORM: "linux"
3133
run: python ./build_tools/github_actions/configure_target_run.py
3234

33-
3435
test_release_packages:
3536
# If there is a test machine available
3637
if: ${{ needs.generate_target_to_run.outputs.test_runs_on != '' }}

build_tools/github_actions/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ add_test(
44
"${CMAKE_CURRENT_SOURCE_DIR}/tests/configure_ci_test.py"
55
)
66

7+
add_test(
8+
NAME build_tools_github_actions_configure_target_run_test
9+
COMMAND "${Python3_EXECUTABLE}"
10+
"${CMAKE_CURRENT_SOURCE_DIR}/tests/configure_target_run_test.py"
11+
)
12+
713
add_test(
814
NAME build_tools_github_actions_determine_version_test
915
COMMAND "${Python3_EXECUTABLE}"

build_tools/github_actions/configure_target_run.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
"""This file looks up the test-runs-on runner label for a given GPU family.
2+
3+
Environment variable inputs:
4+
* 'TARGET': A GPU family like 'gfx95X-dcgpu' or 'gfx1151', corresponding
5+
to a release index.
6+
* 'PLATFORM': "linux" or "windows"
7+
"""
8+
19
import os
210
from amdgpu_family_matrix import (
311
amdgpu_family_info_matrix_presubmit,
@@ -6,32 +14,46 @@
614

715
from github_actions_utils import *
816

9-
# This file helps configure which target to run
10-
11-
# TODO (geomin12): this is very hard-coded to a very specific use-case.
12-
# Once portable_linux_package_matrix.yml matures, this will mature as well
13-
# Some logic is duplicated with fetch_package_targets.py
14-
1517

16-
def main(target: str, platform: str):
18+
def get_runner_label(target: str, platform: str) -> str:
19+
print(f"Searching for a runner for target '{target}' on platform '{platform}'")
1720
amdgpu_family_info_matrix = (
1821
amdgpu_family_info_matrix_presubmit | amdgpu_family_info_matrix_postsubmit
1922
)
2023
for key, info_for_key in amdgpu_family_info_matrix.items():
21-
# Only consider items containing the amdgpu_family (ex: gfx94X in gfx94X-dcgpu)
22-
if key not in target.lower():
23-
continue
24-
24+
print(f"Cheecking key '{key}' with info:\n {info_for_key}")
2525
platform_for_key = info_for_key.get(platform)
2626

2727
if not platform_for_key:
2828
# Some AMDGPU families are only supported on certain platforms.
29+
print(f" Skipping since this entry has no platform '{platform}'")
30+
continue
31+
32+
# Check against both the inner "family" and the outer "key". If neither
33+
# match then skip. Workflows are expected to use the inner "family"
34+
# but manually triggered runs may use the outer "key" instead, so we'll
35+
# be a bit lenient here.
36+
# This needs a rework, see https://github.com/ROCm/TheRock/issues/1097.
37+
family_for_platform = platform_for_key.get("family")
38+
if target != family_for_platform and key not in target.lower():
39+
print(
40+
f" Skipping since the target '{target}' does not match the family '{family_for_platform}'"
41+
)
2942
continue
3043

3144
# If there is a test machine available for this target, run on it.
3245
test_runs_on_machine = platform_for_key.get("test-runs-on")
3346
if test_runs_on_machine:
34-
gha_set_output({"test-runs-on": test_runs_on_machine})
47+
print(f" Found runner: '{test_runs_on_machine}'")
48+
return test_runs_on_machine
49+
50+
return ""
51+
52+
53+
def main(target: str, platform: str):
54+
runner_label = get_runner_label(target, platform)
55+
if runner_label:
56+
gha_set_output({"test-runs-on": runner_label})
3557

3658

3759
if __name__ == "__main__":

build_tools/github_actions/fetch_package_targets.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,50 @@
1+
"""This file helps generate a package target matrix for workflows.
2+
3+
Environment variable inputs:
4+
* 'AMDGPU_FAMILIES': A comma separated list of AMD GPU families, e.g.
5+
`gfx94X,gfx103x`, or empty for the default list
6+
* 'THEROCK_PACKAGE_PLATFORM': "linux" or "windows"
7+
8+
Outputs written to GITHUB_OUTPUT:
9+
* 'package_targets': JSON list of the form
10+
[
11+
{
12+
"amdgpu_family": "gfx94X-dcgpu",
13+
"test_machine": "linux-mi300-1gpu-ossci-rocm"
14+
},
15+
{
16+
"amdgpu_family": "gfx110X-dgpu",
17+
"test_machine": ""
18+
}
19+
]
20+
21+
Example usage:
22+
23+
```yml
24+
jobs:
25+
setup_metadata:
26+
runs-on: ubuntu-24.04
27+
outputs:
28+
package_targets: ${{ steps.configure.outputs.package_targets }}
29+
30+
steps:
31+
- name: Generating package target matrix
32+
id: configure
33+
env:
34+
AMDGPU_FAMILIES: ${{ inputs.families }}
35+
THEROCK_PACKAGE_PLATFORM: "windows"
36+
run: python ./build_tools/github_actions/fetch_package_targets.py
37+
38+
windows_packages:
39+
name: ${{ matrix.target_bundle.amdgpu_family }}::Build Windows
40+
runs-on: 'windows-2022'
41+
needs: [setup_metadata]
42+
strategy:
43+
matrix:
44+
target_bundle: ${{ fromJSON(needs.setup_metadata.outputs.package_targets) }}
45+
```
46+
"""
47+
148
import os
249
import json
350
from amdgpu_family_matrix import (
@@ -8,8 +55,6 @@
855

956
from github_actions_utils import *
1057

11-
# This file helps generate a package target matrix for workflows.
12-
1358

1459
def determine_package_targets(args):
1560
amdgpu_families = args.get("AMDGPU_FAMILIES")
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from pathlib import Path
2+
import os
3+
import sys
4+
import unittest
5+
6+
sys.path.insert(0, os.fspath(Path(__file__).parent.parent))
7+
import configure_target_run
8+
9+
10+
class ConfigureTargetRunTest(unittest.TestCase):
11+
def test_linux_gfx94X(self):
12+
# gfx94x is the outer key used to construct workflow pipelines, while
13+
# gfx94X-dcgpu is the inner key, which we use for package names. When
14+
# run from a workflow, we expect to only work on the inner keys.
15+
runner_label = configure_target_run.get_runner_label("gfx94x", "linux")
16+
self.assertEqual(runner_label, "linux-mi325-1gpu-ossci-rocm")
17+
18+
def test_linux_gfx94X_dcgpu(self):
19+
# gfx94x is the outer key used to construct workflow pipelines, while
20+
# gfx94X-dcgpu is the inner key, which we use for package names. When
21+
# run from a workflow, we expect to only work on the inner keys.
22+
runner_label = configure_target_run.get_runner_label("gfx94X-dcgpu", "linux")
23+
self.assertEqual(runner_label, "linux-mi325-1gpu-ossci-rocm")
24+
25+
def test_windows_gfx115x(self):
26+
runner_label = configure_target_run.get_runner_label("gfx115x", "windows")
27+
self.assertEqual(runner_label, "windows-strix-halo-gpu-rocm")
28+
29+
def test_windows_gfx1151(self):
30+
runner_label = configure_target_run.get_runner_label("gfx1151", "windows")
31+
self.assertEqual(runner_label, "windows-strix-halo-gpu-rocm")
32+
33+
def test_windows_gfx120X_all(self):
34+
runner_label = configure_target_run.get_runner_label("gfx120X-all", "windows")
35+
# No runner label yet.
36+
self.assertEqual(runner_label, "")
37+
38+
39+
if __name__ == "__main__":
40+
unittest.main()

0 commit comments

Comments
 (0)