Skip to content

Sliding Window block classification logic #747

Sliding Window block classification logic

Sliding Window block classification logic #747

Workflow file for this run

name: AMD Perf Kernel Tests
on:
workflow_dispatch:
pull_request:
branches: [main_perf]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
Integration-Tests-AMD:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [linux-mi300-gpu-1]
fail-fast: false # disables failing the entire job when one matrix entry fails
timeout-minutes: 720 # self hosted runners can run jobs for longer than the default of 360 minutes
container:
image: rocm/pytorch:latest
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --shm-size 16G --group-add video --user root
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Show Device Info
run: |
rocminfo | grep gfx
- name: Uninstall Triton
run: |
pip uninstall -y triton
rm -rf ~/.triton
rm -rf ./triton/python/build
- name: Install Triton
run: |
pip install triton==3.3.0
- name: Show Triton version
run: |
pip show triton
- name: Build
run: |
FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" python setup.py install
- name: Install dependencies for bench and misc
run: |
pip install matplotlib pandas tabulate
- name: AMD Internal Tests
run: |
FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" FLASH_ATTENTION_TRITON_AMD_AUTOTUNE=0 pytest flash_attn/flash_attn_triton_amd/test.py
- name: Flash Attention Tests
run: |
FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" FLASH_ATTENTION_TRITON_AMD_AUTOTUNE=0 pytest -n 8 tests/test_flash_attn_triton_amd.py
- name: AMD Bench
run: |
python flash_attn/flash_attn_triton_amd/bench.py -benchmark_fn flash_attn_func
python flash_attn/flash_attn_triton_amd/bench.py -benchmark_fn flash_attn_varlen_func
python flash_attn/flash_attn_triton_amd/bench.py -benchmark_fn flash_attn_with_kvcache