Skip to content

Commit e2e8dbe

Browse files
authored
CI workflow for Flash Attn (#41857)
ci for flash attn Co-authored-by: ydshieh <[email protected]>
1 parent 7a833d1 commit e2e8dbe

File tree

4 files changed

+73
-2
lines changed

4 files changed

+73
-2
lines changed

.github/workflows/model_jobs.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ on:
2828
report_repo_id:
2929
required: false
3030
type: string
31+
pytest_marker:
32+
required: false
33+
type: string
3134

3235
env:
3336
HF_HOME: /mnt/cache
@@ -137,7 +140,7 @@ jobs:
137140
- name: Run all tests on GPU
138141
working-directory: /transformers
139142
run: |
140-
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
143+
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v -m '${{ inputs.pytest_marker }}' --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
141144
ls -la
142145
# Extract the exit code from the output file
143146
EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: Nvidia CI - Flash Attn
2+
3+
on:
4+
repository_dispatch:
5+
schedule:
6+
- cron: "17 2 * * *"
7+
push:
8+
branches:
9+
- run_nvidia_ci_flash_attn*
10+
workflow_dispatch:
11+
inputs:
12+
prev_workflow_run_id:
13+
description: 'previous workflow run id to compare'
14+
type: string
15+
required: false
16+
default: ""
17+
other_workflow_run_id:
18+
description: 'other workflow run id to compare'
19+
type: string
20+
required: false
21+
default: ""
22+
23+
24+
# Used for `push` to easily modify the target workflow runs to compare against
25+
env:
26+
prev_workflow_run_id: ""
27+
other_workflow_run_id: ""
28+
29+
30+
jobs:
31+
setup:
32+
name: Setup
33+
runs-on: ubuntu-22.04
34+
steps:
35+
- name: Setup
36+
run: |
37+
mkdir "setup_values"
38+
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
39+
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
40+
41+
- name: Upload artifacts
42+
uses: actions/upload-artifact@v4
43+
with:
44+
name: setup_values
45+
path: setup_values
46+
47+
48+
model-ci:
49+
name: Model CI
50+
uses: ./.github/workflows/self-scheduled.yml
51+
with:
52+
job: run_models_gpu
53+
slack_report_channel: "#transformers-ci-flash-attn"
54+
docker: huggingface/transformers-all-latest-gpu
55+
ci_event: Daily CI
56+
runner_type: "a10"
57+
report_repo_id: hf-internal-testing/transformers_flash_attn_ci
58+
commit_sha: ${{ github.sha }}
59+
pytest_marker: "flash_attn_test or flash_attn_3_test"
60+
secrets: inherit

.github/workflows/self-scheduled.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ on:
3838
default: ""
3939
required: false
4040
type: string
41+
pytest_marker:
42+
required: false
43+
type: string
44+
4145

4246
env:
4347
HF_HOME: /mnt/cache
@@ -127,6 +131,7 @@ jobs:
127131
commit_sha: ${{ inputs.commit_sha || github.sha }}
128132
runner_type: ${{ inputs.runner_type }}
129133
report_repo_id: ${{ inputs.report_repo_id }}
134+
pytest_marker: ${{ inputs.pytest_marker }}
130135
secrets: inherit
131136

132137
run_trainer_and_fsdp_gpu:

utils/notification_service.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1407,7 +1407,10 @@ def pop_default(l: list[Any], i: int, default: Any) -> Any:
14071407
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
14081408
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
14091409

1410-
nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
1410+
nvidia_daily_ci_workflow = (
1411+
"huggingface/transformers/.github/workflows/self-scheduled-caller.yml",
1412+
"huggingface/transformers/.github/workflows/self-scheduled-flash-attn-caller.yml",
1413+
)
14111414
amd_daily_ci_workflows = (
14121415
"huggingface/transformers/.github/workflows/self-scheduled-amd-mi325-caller.yml",
14131416
"huggingface/transformers/.github/workflows/self-scheduled-amd-mi355-caller.yml",

0 commit comments

Comments
 (0)