Skip to content

Commit ccf6a28

Browse files
prathikrliqunfufajin-corptianleiwuadrianlizarraga
authored
ORT 1.19.0 Release: Cherry-Pick Round 1 (#21619)
### Description <!-- Describe your changes. --> PRs marked for cherry-pick. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> ORT 1.19.0 Release Preparation --------- Signed-off-by: Liqun Fu <[email protected]> Signed-off-by: liqunfu <[email protected]> Signed-off-by: Liqun Fu <[email protected]> Co-authored-by: liqun Fu <[email protected]> Co-authored-by: Jing Fang <[email protected]> Co-authored-by: Tianlei Wu <[email protected]> Co-authored-by: Adrian Lizarraga <[email protected]> Co-authored-by: Changming Sun <[email protected]> Co-authored-by: Sumit Agarwal <[email protected]> Co-authored-by: vraspar <[email protected]> Co-authored-by: Scott McKay <[email protected]> Co-authored-by: Edward Chen <[email protected]> Co-authored-by: Yi Zhang <[email protected]> Co-authored-by: jingyanwangms <[email protected]> Co-authored-by: Yi Zhang <[email protected]> Co-authored-by: Chi Lo <[email protected]> Co-authored-by: saurabh <[email protected]> Co-authored-by: sfatimar <[email protected]>
1 parent ee2fe87 commit ccf6a28

File tree

125 files changed

+11352
-1689
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+11352
-1689
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<packages>
33
<package id="python" version="3.9.7" targetFramework="native" />
4-
<package id="Microsoft.AI.DirectML" version="1.15.0" targetFramework="native" />
4+
<package id="Microsoft.AI.DirectML" version="1.15.1" targetFramework="native" />
55
<package id="Microsoft.Windows.CppWinRT" version="2.0.201201.7" targetFramework="native" />
66
</packages>
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<packages>
33
<package id="pythonx86" version="3.9.7" targetFramework="native" />
4-
<package id="Microsoft.AI.DirectML" version="1.15.0" targetFramework="native" />
4+
<package id="Microsoft.AI.DirectML" version="1.15.1" targetFramework="native" />
55
<package id="Microsoft.Windows.CppWinRT" version="2.0.201201.7" targetFramework="native" />
66
</packages>

cmake/external/dml.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
4141
set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config)
4242
set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config)
4343
get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE)
44-
set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.15.0)
44+
set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.15.1)
4545

4646
# Restore nuget packages, which will pull down the DirectML redist package.
4747
add_custom_command(

cmake/onnxruntime.cmake

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@ function(get_c_cxx_api_headers HEADERS_VAR)
3838

3939
# need to add header files for enabled EPs
4040
foreach(f ${ONNXRUNTIME_PROVIDER_NAMES})
41-
file(GLOB _provider_headers CONFIGURE_DEPENDS
42-
"${REPO_ROOT}/include/onnxruntime/core/providers/${f}/*.h"
43-
)
44-
list(APPEND _headers ${_provider_headers})
41+
# The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
42+
# with onnxruntime_c_api.h . Most other EPs probably also do not work in this way.
43+
if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm))
44+
file(GLOB _provider_headers CONFIGURE_DEPENDS
45+
"${REPO_ROOT}/include/onnxruntime/core/providers/${f}/*.h"
46+
)
47+
list(APPEND _headers ${_provider_headers})
48+
endif()
4549
endforeach()
4650

4751
set(${HEADERS_VAR} ${_headers} PARENT_SCOPE)

cmake/onnxruntime_mlas.cmake

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -555,8 +555,17 @@ else()
555555
${MLAS_SRC_DIR}/intrinsics/avx2/qdwconv_avx2.cpp
556556
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx2.cpp
557557
)
558-
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
559558

559+
message(STATUS "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
560+
message(STATUS "CMAKE_CXX_COMPILER_VERSION: ${CMAKE_CXX_COMPILER_VERSION}")
561+
562+
if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "10")
563+
message(STATUS "Using -mavx2 -mfma -mavxvnni flags")
564+
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma -mavxvnni")
565+
else()
566+
message(STATUS "Using -mavx2 -mfma flags")
567+
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
568+
endif()
560569
set(mlas_platform_srcs_avx512f
561570
${MLAS_SRC_DIR}/x86_64/DgemmKernelAvx512F.S
562571
${MLAS_SRC_DIR}/x86_64/SgemmKernelAvx512F.S
@@ -575,7 +584,7 @@ else()
575584
${MLAS_SRC_DIR}/x86_64/ConvSymKernelAvx512Core.S
576585
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512.cpp
577586
)
578-
set_source_files_properties(${mlas_platform_srcs_avx512core} PROPERTIES COMPILE_FLAGS "-mavx512bw -mavx512dq -mavx512vl")
587+
set_source_files_properties(${mlas_platform_srcs_avx512core} PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl")
579588

580589
set(mlas_platform_srcs_avx512vnni
581590
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512vnni.cpp

cmake/onnxruntime_providers_cpu.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ if (onnxruntime_ENABLE_TRAINING)
219219
endif()
220220

221221
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
222+
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/resource.h ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/custom_op_context.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
222223
set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
223224
set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")
224225

cmake/onnxruntime_providers_cuda.cmake

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,15 @@
289289
config_cuda_provider_shared_module(onnxruntime_providers_cuda_obj)
290290
endif()
291291
config_cuda_provider_shared_module(onnxruntime_providers_cuda)
292-
292+
# Cannot use glob because the file cuda_provider_options.h should not be exposed out.
293+
set(ONNXRUNTIME_CUDA_PROVIDER_PUBLIC_HEADERS
294+
"${REPO_ROOT}/include/onnxruntime/core/providers/cuda/cuda_context.h"
295+
"${REPO_ROOT}/include/onnxruntime/core/providers/cuda/cuda_resource.h"
296+
)
297+
set_target_properties(onnxruntime_providers_cuda PROPERTIES
298+
PUBLIC_HEADER "${ONNXRUNTIME_CUDA_PROVIDER_PUBLIC_HEADERS}")
293299
install(TARGETS onnxruntime_providers_cuda
300+
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers/cuda
294301
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
295302
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
296303
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})

cmake/onnxruntime_providers_rocm.cmake

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,13 @@
223223
if (onnxruntime_ENABLE_ATEN)
224224
target_compile_definitions(onnxruntime_providers_rocm PRIVATE ENABLE_ATEN)
225225
endif()
226-
226+
file(GLOB ONNXRUNTIME_ROCM_PROVIDER_PUBLIC_HEADERS CONFIGURE_DEPENDS
227+
"${REPO_ROOT}/include/onnxruntime/core/providers/rocm/*.h"
228+
)
229+
set_target_properties(onnxruntime_providers_rocm PROPERTIES
230+
PUBLIC_HEADER "${ONNXRUNTIME_ROCM_PROVIDER_PUBLIC_HEADERS}")
227231
install(TARGETS onnxruntime_providers_rocm
232+
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers/rocm
228233
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
229234
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
230235
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
33
<ItemGroup Condition="('$(OutputType)'!='Library' OR '$(IsAppExtension)'=='True')">
4-
<NativeReference Include="$(MSBuildThisFileDirectory)..\..\runtimes\ios\native\onnxruntime.xcframework">
4+
<NativeReference Include="$(MSBuildThisFileDirectory)..\..\runtimes\ios\native\onnxruntime.xcframework.zip">
55
<Kind>Static</Kind>
66
<IsCxx>True</IsCxx>
77
<SmartLink>True</SmartLink>
@@ -10,4 +10,4 @@
1010
<WeakFrameworks>CoreML</WeakFrameworks>
1111
</NativeReference>
1212
</ItemGroup>
13-
</Project>
13+
</Project>

include/onnxruntime/core/optimizer/graph_transformer_utils.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33

44
#pragma once
55

6+
#include <string>
67
#include <memory>
8+
#include <unordered_map>
79
#include <unordered_set>
810
#include <vector>
911

1012
#include "core/common/inlined_containers.h"
1113
#include "core/framework/session_options.h"
14+
#include "core/framework/tensor.h"
1215
#include "core/optimizer/graph_transformer.h"
1316
#include "core/platform/threadpool.h"
1417

@@ -51,7 +54,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
5154
const SessionOptions& session_options,
5255
const IExecutionProvider& execution_provider /*required by constant folding*/,
5356
const InlinedHashSet<std::string>& rules_and_transformers_to_disable = {},
54-
concurrency::ThreadPool* intra_op_thread_pool = nullptr);
57+
concurrency::ThreadPool* intra_op_thread_pool = nullptr,
58+
std::unordered_map<std::string, std::unique_ptr<Tensor>>* p_buffered_tensors = nullptr);
5559

5660
#endif // !defined(ORT_MINIMAL_BUILD)
5761

@@ -81,7 +85,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformersForMinimalB
8185
const SatApplyContextVariant& apply_context,
8286
const IExecutionProvider& cpu_execution_provider,
8387
const InlinedHashSet<std::string>& rules_and_transformers_to_disable = {},
84-
concurrency::ThreadPool* intra_op_thread_pool = nullptr);
88+
concurrency::ThreadPool* intra_op_thread_pool = nullptr,
89+
std::unordered_map<std::string, std::unique_ptr<Tensor>>* p_buffered_tensors = nullptr);
8590

8691
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
8792

0 commit comments

Comments
 (0)