Skip to content

Commit b79badc

Browse files
authored
only run the remainder of the gpu test suite if one case passes first (#2009) [skip ci]
* only run the remainder of the gpu test suite if one case passes first * also reduce the test matrix
1 parent 6f02c0c commit b79badc

File tree

2 files changed

+41
-20
lines changed

2 files changed

+41
-20
lines changed

.github/workflows/tests-nightly.yml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,6 @@ jobs:
8282
num_gpus: 1
8383
axolotl_extras: mamba-ssm
8484
nightly_build: "true"
85-
- cuda: 121
86-
cuda_version: 12.1.1
87-
python_version: "3.11"
88-
pytorch: 2.3.1
89-
num_gpus: 1
90-
axolotl_extras: mamba-ssm
91-
nightly_build: "true"
9285
- cuda: 124
9386
cuda_version: 12.4.1
9487
python_version: "3.11"

.github/workflows/tests.yml

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
run: |
7373
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
7474
75-
docker-e2e-tests:
75+
docker-e2e-tests-1st:
7676
if: github.repository_owner == 'axolotl-ai-cloud'
7777
# this job needs to be run on self-hosted GPU runners...
7878
runs-on: [self-hosted, modal]
@@ -83,24 +83,52 @@ jobs:
8383
fail-fast: false
8484
matrix:
8585
include:
86-
- cuda: 121
87-
cuda_version: 12.1.1
88-
python_version: "3.10"
89-
pytorch: 2.3.1
90-
num_gpus: 1
91-
axolotl_extras: mamba-ssm
92-
- cuda: 121
93-
cuda_version: 12.1.1
94-
python_version: "3.11"
95-
pytorch: 2.3.1
96-
num_gpus: 1
97-
axolotl_extras: mamba-ssm
9886
- cuda: 124
9987
cuda_version: 12.4.1
10088
python_version: "3.11"
10189
pytorch: 2.4.1
10290
num_gpus: 1
10391
axolotl_extras:
92+
steps:
93+
- name: Checkout
94+
uses: actions/checkout@v4
95+
- name: Install Python
96+
uses: actions/setup-python@v5
97+
with:
98+
python-version: "3.10"
99+
- name: Install Modal
100+
run: |
101+
python -m pip install --upgrade pip
102+
pip install modal==0.63.64 jinja2
103+
- name: Update env vars
104+
run: |
105+
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
106+
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
107+
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
108+
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
109+
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
110+
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
111+
- name: Run tests job on Modal
112+
run: |
113+
modal run cicd.tests
114+
115+
docker-e2e-tests:
116+
if: github.repository_owner == 'axolotl-ai-cloud'
117+
# this job needs to be run on self-hosted GPU runners...
118+
runs-on: [self-hosted, modal]
119+
timeout-minutes: 90
120+
needs: [pre-commit, pytest, docker-e2e-tests-1st]
121+
122+
strategy:
123+
fail-fast: false
124+
matrix:
125+
include:
126+
- cuda: 121
127+
cuda_version: 12.1.1
128+
python_version: "3.10"
129+
pytorch: 2.3.1
130+
num_gpus: 1
131+
axolotl_extras: mamba-ssm
104132
- cuda: 124
105133
cuda_version: 12.4.1
106134
python_version: "3.11"

0 commit comments

Comments
 (0)