Skip to content

[Nightly] Add Inductor UT #7236

[Nightly] Add Inductor UT

[Nightly] Add Inductor UT #7236

Workflow file for this run

name: pull
on:
pull_request:
types:
- opened
- synchronize
- reopened
- converted_to_draft
- ready_for_review
- labeled
branches:
- main
- release/*
permissions: read-all
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
preci-lint-check:
name: preci-lint-check
if: ${{ github.repository_owner == 'intel' }}
runs-on: ubuntu-22.04
timeout-minutes: 30
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
with:
path: torch-xpu-ops
- name: Run lint check
run: |
export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT,MERGE_CONFLICTLESS_CSV --all-files"
cd ./torch-xpu-ops
bash .github/scripts/lintrunner.sh
- name: Run lint check with Clang
run: |
sudo apt update && sudo apt install -y libomp-dev
rm -rf pytorch
git clone https://github.com/pytorch/pytorch pytorch
cd pytorch && cp -r ../torch-xpu-ops third_party/
export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT \
build/xpu/**/*.* \
build/xpu/*.* \
third_party/torch-xpu-ops/src/*.* \
third_party/torch-xpu-ops/src/**/*.* \
third_party/torch-xpu-ops/src/**/**/*.* \
third_party/torch-xpu-ops/src/**/**/**/*.*"
export CLANG=1
bash third_party/torch-xpu-ops/.github/scripts/lintrunner.sh
preci-conditions-filter:
name: preci-conditions-filter
if: ${{ github.event.pull_request.draft == false }}
needs: [preci-lint-check]
runs-on: ubuntu-22.04
timeout-minutes: 10
env:
GH_TOKEN: ${{ github.token }}
outputs:
src_changed: ${{ steps.check-files.outputs.src_changed }}
has_label: ${{ steps.check-label.outputs.has_label }}
disabled_tests: ${{ steps.check-pr-desc.outputs.disabled_tests }}
steps:
- uses: dorny/paths-filter@v2
id: check-files
with:
filters: |
src_changed:
- 'cmake/**'
- 'tools/**'
- 'src/**.cmake'
- 'CMakeLists.txt'
- 'test/sycl/CMakeLists.txt'
- 'src/xccl/CMakeLists.txt'
- 'src/ATen/CMakeLists.txt'
- 'src/CMakeLists.txt'
- '.github/workflows/_windows_ut.yml'
- name: Check Label
id: check-label
run: |
echo "has_label=${{ contains(github.event.pull_request.labels.*.name, 'windows_ci') }}" >> $GITHUB_OUTPUT
- name: Check PR infos
id: check-pr-desc
run: |
set -x -e -o pipefail
sudo apt update && sudo apt install -y dos2unix
gh --repo ${GITHUB_REPOSITORY} pr view ${{ github.event.pull_request.number }} 2>&1 |tee pr-info.txt
dos2unix pr-info.txt
disabled_tests="$(awk '/disable_/{printf("%s ", $0)}' pr-info.txt)"
echo "disabled_tests=${disabled_tests}" |tee "${GITHUB_OUTPUT}"
preci-linux-build:
name: preci-linux
if: ${{ !contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_all')}}
needs: [preci-conditions-filter]
secrets: inherit
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: main
runner: pvc_e2e
preci-linux-ut:
name: preci-linux
needs: [preci-conditions-filter, preci-linux-build]
uses: ./.github/workflows/_linux_ut.yml
with:
disabled_tests: ${{ needs.preci-conditions-filter.outputs.disabled_tests }}
ut: op_regression,op_regression_dev1,op_transformers,op_extended,op_ut,xpu_distributed
runner: linux.idc.xpu
preci-linux-e2e:
if: ${{ !contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_e2e') }}
name: preci-linux / e2e_test
needs: [preci-conditions-filter, preci-linux-build]
runs-on: pvc_e2e
env:
GH_TOKEN: ${{ github.token }}
reference_issue: 1645
timeout-minutes: 300
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Prepare Conda ENV
run: |
which conda && conda clean -ay
conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci
conda create -n e2e_ci python=3.10 cmake ninja -y
source activate e2e_ci
pip install pandas scipy psutil requests
- name: Download Pytorch wheel
uses: actions/download-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number }}
- name: Install Pytorch XPU
run: |
source activate e2e_ci
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
cd ../
rm -rf pytorch || sudo rm -rf pytorch
git clone https://github.com/pytorch/pytorch pytorch
cd pytorch && git checkout ${TORCH_COMMIT_ID}
# apply PRs for stock pytorch
# https://github.com/pytorch/pytorch/pull/152940 internal use only for subset model list
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/pytorch/pytorch/pull/152940
git show -s && git status && git diff
- name: Triton Installation
run: |
source activate e2e_ci
cd ../pytorch
pip install cmake ninja pybind11
rm -rf pytorch_triton_xpu-*.whl
python .github/scripts/build_triton_wheel.py --device xpu
pip install pytorch_triton_xpu-*.whl
- name: Identify pinned versions
run: |
cd ../pytorch
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" >> "${GITHUB_ENV}"
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" >> "${GITHUB_ENV}"
echo "TORCHBENCH_COMMIT_ID=$(<.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}"
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" >> "${GITHUB_ENV}"
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}"
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}"
. /etc/os-release
echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
source ../torch-xpu-ops/.github/scripts/env.sh
echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
- name: Torch Config
run: |
echo "$GITHUB_ENV"
rm -rf ../pytorch/inductor_log || sudo rm -rf ../pytorch/inductor_log
rm -rf /tmp/torchinductor_* || sudo rm -rf /tmp/torchinductor_*
rm -rf ~/.triton/cache || sudo rm -rf ~/.triton/cache
cd ..
source activate e2e_ci
python -c "import triton; print(triton.__version__)"
python pytorch/torch/utils/collect_env.py
- name: Huggingface BF16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
dt: bfloat16
mode: training
scenario: accuracy,performance
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Huggingface FP16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
dt: float16
mode: training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Timm_models BF16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: timm_models
dt: bfloat16
mode: training
scenario: accuracy,performance
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Torchbench BF16 Training Accuracy Test
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: torchbench
dt: bfloat16
mode: training
scenario: accuracy,performance
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Download Reference Artifact
id: reference_id
run: |
set -xe
source activate e2e_ci
conda install gh --channel conda-forge -y
REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
--json body -q .body |grep "Inductor-weekly-LTS-XPU-E2E" |sed 's/.*: *//')"
gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
rm -rf reference && mv Inductor-*-XPU-E2E-* reference
- name: Summarize archieve files
if: ${{ ! cancelled() }}
run: |
set -x -e -o pipefail
rm -rf ${{ github.workspace }}/upload_files || sudo rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
# Print summary
source activate e2e_ci
export IS_PR=1
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
${{ github.workspace }}/upload_files \
${{ github.workspace }}/reference \
>> ${GITHUB_STEP_SUMMARY}
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
grep -E "(Real failed|to passed|Warning timeout).*: [1-9]|Summary for" /tmp/tmp-*.txt |grep -E "failed|passed|timeout" -B 1
echo "There are ${exit_label} cases that need look into!!! Please check them"
exit ${exit_label}
fi
- name: Upload Inductor XPU E2E Data
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-CI-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files
preci-windows:
name: preci-windows
if: ${{ !(contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.preci-conditions-filter.outputs.disabled_tests, 'disable_win')) }}
needs: [preci-conditions-filter]
uses: ./.github/workflows/_windows_ut.yml
with:
ut: op_extended,torch_xpu
runner: Windows_CI
src_changed: ${{ needs.preci-conditions-filter.outputs.src_changed }}
has_label: ${{ needs.preci-conditions-filter.outputs.has_label }}