Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 39 additions & 30 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,50 @@ if (onnxruntime_USE_ROCM)
message(FATAL_ERROR "ROCM does not support build with CUDA!")
endif()

# replicate strategy used by pytorch to get ROCM_VERSION
# https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake
# with modification
if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version")
message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n")
file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW)
string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW})
elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h")
message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/include/rocm_version.h ****\n")
file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW)
string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW})
elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h")
message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h ****\n")
file(READ "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h" ROCM_VERSION_H_RAW)
string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW})
endif()

if (ROCM_VERSION_MATCH)
set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1})
set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2})
set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3})
set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}")
math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}")

message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}")
message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}")
message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}")
message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}")
else()
message(FATAL_ERROR "Cannot determine ROCm version string")
endif()


if (NOT CMAKE_HIP_COMPILER)
set(CMAKE_HIP_COMPILER "${onnxruntime_ROCM_HOME}/llvm/bin/clang++")
endif()

if (NOT CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
if (ROCM_VERSION_DEV VERSION_LESS "6.2")
message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2")
else()
set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
endif()
endif()

file(GLOB rocm_cmake_components ${onnxruntime_ROCM_HOME}/lib/cmake/*)
Expand Down Expand Up @@ -327,35 +365,6 @@ if (onnxruntime_USE_ROCM)
set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl)
endif()

# replicate strategy used by pytorch to get ROCM_VERSION
# https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake
# with modification
if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version")
file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW)
string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW})
elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h")
file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW)
string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW})
elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h")
file(READ "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h" ROCM_VERSION_H_RAW)
string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW})
endif()

if (ROCM_VERSION_MATCH)
set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1})
set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2})
set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3})
set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}")
math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}")
else()
message(FATAL_ERROR "Cannot determine ROCm version string")
endif()
message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n")
message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}")
message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}")
message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}")
message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}")
message("\n***** HIP LANGUAGE CONFIG INFO ****\n")
message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}")
message("CMAKE_HIP_ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}")
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile.migraphx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Dockerfile to run ONNXRuntime with MIGraphX integration
#--------------------------------------------------------------------------

FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1
FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0

ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG ONNXRUNTIME_BRANCH=main
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Dockerfile to run ONNXRuntime with ROCm integration
#--------------------------------------------------------------------------

FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1
FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0

ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG ONNXRUNTIME_BRANCH=main
Expand Down
4 changes: 2 additions & 2 deletions dockerfiles/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ Nothing else from ONNX Runtime source tree will be copied/installed to the image
Note: When running the container you built in Docker, please either use 'nvidia-docker' command instead of 'docker', or use Docker command-line options to make sure NVIDIA runtime will be used and appropriate files mounted from host. Otherwise, CUDA libraries won't be found. You can also [set NVIDIA runtime as default in Docker](https://github.com/dusty-nv/jetson-containers#docker-default-runtime).

## MIGraphX
**Ubuntu 20.04, ROCm6.0, MIGraphX**
**Ubuntu 22.04, ROCm6.2, MIGraphX**

1. Build the docker image from the Dockerfile in this repository.
```
Expand All @@ -306,7 +306,7 @@ Note: When running the container you built in Docker, please either use 'nvidia-
```

## ROCm
**Ubuntu 20.04, ROCm6.0**
**Ubuntu 22.04, ROCm6.2**

1. Build the docker image from the Dockerfile in this repository.
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ TEST(InternalTestingEP, PreventSaveOfModelWithCompiledOps) {

// the internal NHWC operators are only included as part of contrib ops currently. as the EP requests the NHWC
// version of the ONNX operator when matching a static kernel, those are required.
#if !defined(DISABLE_CONTRIB_OPS)
#if !defined(DISABLE_CONTRIB_OPS) && !defined(USE_ROCM)
TEST(InternalTestingEP, TestMixOfStaticAndCompiledKernels) {
const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "transform/fusion/conv_relu_opset12.onnx";

Expand Down Expand Up @@ -256,10 +256,6 @@ TEST(InternalTestingEP, TestNhwcConversionOfStaticKernels) {
run_test(ort_model_path);
}

// This test can be deprecated now as the code logic has been changed so the model is not applicable
// TEST(InternalTestingEP, TestRegisterAllocatorHandlesUsageInMultipleSessions) {
//}

// make sure allocators returned by SessionState::GetAllocator are valid when IExecutionProvider::ReplaceAllocator
// is used. if something is off InferenceSession::Initialize will fail.
TEST(InternalTestingEP, TestReplaceAllocatorDoesntBreakDueToLocalAllocatorStorage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ variables:
- name: render
value: 109
- name: RocmVersion
value: 6.1
value: 6.2
- name: RocmVersionPatchSuffix
value: ".3"

Expand All @@ -64,9 +64,9 @@ jobs:

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm_migraphx
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)"
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix) --build-arg USE_MIGRAPHX=1 --build-arg USE_CUPY=0"
Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion)

- task: Cache@2
Expand Down Expand Up @@ -98,6 +98,7 @@ jobs:
onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) \
/bin/bash -c "
set -ex; \
. /ort/env/bin/activate; \
env; \
ccache -s; \
python tools/ci_build/build.py \
Expand Down Expand Up @@ -163,9 +164,9 @@ jobs:

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm_migraphx
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)"
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix) --build-arg USE_MIGRAPHX=1 --build-arg USE_CUPY=0"
Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion)

- task: CmdLine@2
Expand Down
16 changes: 12 additions & 4 deletions tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ variables:
- name: render
value: 109
- name: RocmVersion
value: 6.1
value: 6.2
- name: RocmVersionPatchSuffix
value: ".3"

Expand All @@ -64,7 +64,7 @@ jobs:

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm_migraphx
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)"
Repository: onnxruntimerocm-cibuild-rocm$(RocmVersion)
Expand Down Expand Up @@ -98,6 +98,7 @@ jobs:
onnxruntimerocm-cibuild-rocm$(RocmVersion) \
/bin/bash -c "
set -ex; \
. /ort/env/bin/activate; \
env; \
ccache -s; \
python tools/ci_build/build.py \
Expand Down Expand Up @@ -164,7 +165,7 @@ jobs:

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm_migraphx
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)"
Repository: onnxruntimerocm-cibuild-rocm$(RocmVersion)
Expand All @@ -188,6 +189,9 @@ jobs:
/bin/bash -c "
set -ex; \
xargs -a /build/Release/perms.txt chmod a+x; \
. /ort/env/bin/activate; \
env; \
ccache -s; \
python /onnxruntime_src/tools/ci_build/build.py \
--config Release \
--cmake_extra_defines \
Expand Down Expand Up @@ -231,7 +235,11 @@ jobs:
-e KERNEL_EXPLORER_TEST_USE_CUPY=1 \
-e CUPY_CACHE_DIR=/build/Release \
onnxruntimerocm-cibuild-rocm$(RocmVersion) \
pytest /onnxruntime_src/onnxruntime/python/tools/kernel_explorer/ -n 4 --reruns 1 --durations=100
/bin/bash -c "set -ex; \
. /ort/env/bin/activate; \
env; \
pip list; \
pytest /onnxruntime_src/onnxruntime/python/tools/kernel_explorer/ -n 4 --reruns 1 --durations=100"
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run kernel explorer tests'
condition: succeededOrFailed()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG LD_LIBRARY_PATH_ARG=${DEVTOOLSET_ROOTPATH}/usr/lib64:${DEVTOOLSET_ROOTPATH}/
ARG PREPEND_PATH=${DEVTOOLSET_ROOTPATH}/usr/bin:

FROM $BASEIMAGE AS base_image
ARG ROCM_VERSION=5.5
ARG ROCM_VERSION=6.2.3

#Add our own dependencies
ADD scripts /tmp/scripts
Expand Down
94 changes: 94 additions & 0 deletions tools/ci_build/github/linux/docker/Dockerfile.ubuntu_rocm_migraphx
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
FROM ubuntu:22.04

ARG ROCM_VERSION=6.2
ARG AMDGPU_VERSION=${ROCM_VERSION}
ARG AMDGPU_TARGETS=gfx906,gfx908,gfx90a
ARG USE_MIGRAPHX=0
ARG USE_CUPY=1

LABEL ROCM_VERSION="${ROCM_VERSION}"
LABEL USE_MIGRAPHX="${USE_MIGRAPHX}"
LABEL USE_CUPY="${USE_CUPY}"

ENV DEBIAN_FRONTEND=noninteractive
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV MIGRAPHX_DISABLE_FAST_GELU=1

RUN echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600

# Install necessary system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
wget \
libnuma-dev \
gnupg \
sudo \
libelf1 \
kmod \
file \
libstdc++6 \
python3 \
python3-pip \
python3.10-dev \
python3.10-venv \
build-essential \
locales \
git

RUN locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8

# Install CMake
ENV CMAKE_VERSION=3.30.5
RUN wget -q https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
tar -zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz --strip-components=1 -C /usr && \
rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz

# Install ccache
ENV CCACHE_VERSION=4.10.2
RUN wget -q https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}-linux-x86_64.tar.xz && \
tar -xf ccache-${CCACHE_VERSION}-linux-x86_64.tar.xz && \
cp ccache-${CCACHE_VERSION}-linux-x86_64/ccache /usr/bin && \
rm -rf ccache-${CCACHE_VERSION}-linux-x86_64*

# Set up virtual environment for Python and install dependencies
WORKDIR /ort
COPY scripts/requirements.txt /ort/
RUN python3 -m venv /ort/env && . /ort/env/bin/activate && \
pip install --upgrade pip && \
pip install -r /ort/requirements.txt && \
pip install psutil ml_dtypes pytest-xdist pytest-rerunfailures scipy

# Add ROCm repository and install ROCm and optional MIGraphX
RUN curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | tee /etc/apt/sources.list.d/rocm.list && \
echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | tee /etc/apt/sources.list.d/amdgpu.list && \
migraphx=$( [ "$USE_MIGRAPHX" -eq 1 ] && echo "migraphx" || echo "" ) && \
apt-get update && apt-get install -y rocm-dev rocm-libs $migraphx && \
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/x86_64-linux-gnu
ENV PATH=/opt/rocm/bin:/usr/bin:/bin:/usr/sbin:/usr/local/bin

# Clone and install CuPy with ROCm support
RUN if [ "$USE_CUPY" = "1" ]; then \
git clone https://github.com/ROCm/cupy.git; \
cd cupy; \
git checkout 432a8683351d681e00903640489cb2f4055d2e09; \
export CUPY_INSTALL_USE_HIP=1; \
export ROCM_HOME=/opt/rocm && \
export HCC_AMDGPU_TARGET="${AMDGPU_TARGETS}"; \
git submodule update --init; \
. /ort/env/bin/activate; \
pip install -e . --no-cache-dir -vvvv; \
cd ..; \
fi

# Ensure the virtual environment is always activated when running commands in the container.
RUN echo ". /ort/env/bin/activate" >> ~/.bashrc

# Set the default command to start an interactive bash shell
CMD ["/bin/bash"]
Loading
Loading