Skip to content

Commit d877ce7

Browse files
authored
Merge branch 'main' into anuragsingh/minimal-onnx-dialect
2 parents fd42af9 + 5fd0e7b commit d877ce7

File tree

91 files changed

+5071
-968
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+5071
-968
lines changed

docs/BuildOnLinuxOSX.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Firstly, install MLIR (as a part of LLVM-Project):
1515
``` bash
1616
git clone -n https://github.com/llvm/llvm-project.git
1717
# Check out a specific branch that is known to work with ONNX-MLIR.
18-
cd llvm-project && git checkout 7a33569510535f0b917a2e50f644bf57490aee24 && cd ..
18+
cd llvm-project && git checkout f8cb7987c64dcffb72414a40560055cb717dbf74 && cd ..
1919
```
2020

2121
[same-as-file]: <> (utils/build-mlir.sh)

docs/BuildOnWindows.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Install MLIR (as a part of LLVM-Project):
5252
```shell
5353
git clone -n https://github.com/llvm/llvm-project.git
5454
# Check out a specific branch that is known to work with ONNX-MLIR.
55-
cd llvm-project && git checkout 7a33569510535f0b917a2e50f644bf57490aee24 && cd ..
55+
cd llvm-project && git checkout f8cb7987c64dcffb72414a40560055cb717dbf74 && cd ..
5656
```
5757

5858
[same-as-file]: <> (utils/build-mlir.cmd)

src/Accelerators/NNPA/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ include(zdnn.cmake)
3636
setup_zdnn(v1.1.2)
3737

3838
# Note: consider to use a compile flag instead.
39-
option(ZDNNX_DEBUG "Enable ZDNNX debug information" ON)
39+
option(ZDNNX_DEBUG "Enable ZDNNX debug information" OFF)
4040
if (ZDNNX_DEBUG)
4141
add_compile_definitions(ZDNNX_DEBUG)
4242
endif()

src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ add_onnx_mlir_library(OMRewriteONNXForZHigh
3333
libzdnn
3434

3535
LINK_LIBS PUBLIC
36+
OMLayoutHelper
3637
OMNNPACompilerOptions
3738
OMONNXOps
3839
OMONNXToKrnl
3940
OMZHighOps
40-
OMLayoutHelper
4141

4242

4343
ACCEL_INCLUDE_DIRS PRIVATE
@@ -71,6 +71,7 @@ add_onnx_mlir_library(OMDevicePlacement
7171
OMONNXOps
7272
OMONNXToZHigh
7373
OMRewriteONNXForZHigh
74+
OMLayoutHelper
7475

7576
ACCEL_INCLUDE_DIRS PRIVATE
7677
${NNPA_INCLUDE_PATH}

src/Accelerators/NNPA/Conversion/ONNXToZHigh/PerfModel.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/Support/Debug.h"
1717

1818
#include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/PerfModel.hpp"
19+
#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
1920
#include "src/Dialect/ONNX/ONNXOps.hpp"
2021
#include "src/Dialect/ONNX/ONNXOps/OpHelper.hpp"
2122

@@ -121,6 +122,7 @@ void estimateTimeForMatMulOp(Operation *op, Value a, Value b, bool aTransposed,
121122
assert(aType && aType.hasRank() && "expected shaped type with A rank");
122123
int64_t aRank = aType.getRank();
123124
llvm::ArrayRef<int64_t> aShape = aType.getShape();
125+
// a => matrix A; B => the Batch dims (aka all but the last 2 dims).
124126
bool aBDynamic;
125127
int64_t aB = summarizeHigherDims(aShape, aRank - 2, aBDynamic);
126128
int64_t aNIndex = aTransposed ? aRank - 1 : aRank - 2;
@@ -132,6 +134,7 @@ void estimateTimeForMatMulOp(Operation *op, Value a, Value b, bool aTransposed,
132134
assert(bType && bType.hasRank() && "expected shaped type with B rank");
133135
int64_t bRank = bType.getRank();
134136
llvm::ArrayRef<int64_t> bShape = bType.getShape();
137+
// b => matrix B; B => the Batch dims (aka all but the last 2 dims).
135138
bool bBDynamic;
136139
int64_t bB = summarizeHigherDims(bShape, bRank - 2, bBDynamic);
137140
int64_t bMIndex = bTransposed ? bRank - 1 : bRank - 2;
@@ -312,6 +315,15 @@ void estimateTimeForOp<ONNXExpOp>(ONNXExpOp op, const DimAnalysis *dimAnalysis,
312315
cpuEstimatedTime, nnpaEstimatedTime);
313316
}
314317

318+
template <>
319+
void estimateTimeForOp<ONNXGeluOp>(ONNXGeluOp op,
320+
const DimAnalysis *dimAnalysis, double &cpuEstimatedTime,
321+
double &nnpaEstimatedTime) {
322+
estimateTimeForElementwiseOp(op.getOperation(), op.getOperand(), dimAnalysis,
323+
estimatedTimeForCPU_Gelu_3ds, estimatedTimeForNNPA_Gelu_3ds,
324+
cpuEstimatedTime, nnpaEstimatedTime);
325+
}
326+
315327
template <>
316328
void estimateTimeForOp<ONNXLogOp>(ONNXLogOp op, const DimAnalysis *dimAnalysis,
317329
double &cpuEstimatedTime, double &nnpaEstimatedTime) {
@@ -401,15 +413,33 @@ double estimateTimeForStickOp(Value oper) {
401413
int64_t e4, e3, e2, e1;
402414
std::string msg;
403415
processDim(oper, e4, e3, e2, e1, msg);
404-
return estimatedTimeForNNPA_Stick_3ds(e4 * e3, e2, e1);
416+
// March 14, no NNPA support.
417+
if (isLessEqualNNPALevel(NNPALevel::M14))
418+
return arch14_estimatedTimeForCPU_Stick_3ds(e4 * e3, e2, e1);
419+
// Else returns minimum between CPU and NNPA
420+
if (isLessEqualNNPALevel(NNPALevel::M15)) {
421+
double cpuTime = arch15_estimatedTimeForCPU_Stick_3ds(e4 * e3, e2, e1);
422+
double nnpaTime = arch15_estimatedTimeForNNPA_Stick_3ds(e4 * e3, e2, e1);
423+
return cpuTime < nnpaTime ? cpuTime : nnpaTime;
424+
}
425+
llvm_unreachable("add new NNPA architecture model here");
405426
}
406427

407428
double estimateTimeForUnstickOp(Value oper) {
408429
// Process dim (collapse and handle dynamic sizes).
409430
int64_t e4, e3, e2, e1;
410431
std::string msg;
411432
processDim(oper, e4, e3, e2, e1, msg);
412-
return estimatedTimeForNNPA_Unstick_3ds(e4 * e3, e2, e1);
433+
// March 14, no NNPA support.
434+
if (isLessEqualNNPALevel(NNPALevel::M14))
435+
return arch14_estimatedTimeForCPU_Unstick_3ds(e4 * e3, e2, e1);
436+
// Else returns minimum between CPU and NNPA
437+
if (isLessEqualNNPALevel(NNPALevel::M15)) {
438+
double cpuTime = arch15_estimatedTimeForCPU_Unstick_3ds(e4 * e3, e2, e1);
439+
double nnpaTime = arch15_estimatedTimeForNNPA_Unstick_3ds(e4 * e3, e2, e1);
440+
return cpuTime < nnpaTime ? cpuTime : nnpaTime;
441+
}
442+
llvm_unreachable("add new NNPA architecture model here");
413443
}
414444

415445
bool estimateTimeForOpWithModel(Operation *op, const DimAnalysis *dimAnalysis,
@@ -432,6 +462,8 @@ bool estimateTimeForOpWithModel(Operation *op, const DimAnalysis *dimAnalysis,
432462
// Unary elementwise NNPA candidate ops.
433463
else if (auto expOp = mlir::dyn_cast<ONNXExpOp>(op))
434464
estimateTimeForOp(expOp, dimAnalysis, cpuEstimatedTime, nnpaEstimatedTime);
465+
else if (auto geluOp = mlir::dyn_cast<ONNXGeluOp>(op))
466+
estimateTimeForOp(geluOp, dimAnalysis, cpuEstimatedTime, nnpaEstimatedTime);
435467
else if (auto logOp = mlir::dyn_cast<ONNXLogOp>(op))
436468
estimateTimeForOp(logOp, dimAnalysis, cpuEstimatedTime, nnpaEstimatedTime);
437469
else if (auto reluOp = mlir::dyn_cast<ONNXReluOp>(op))

0 commit comments

Comments
 (0)