Skip to content
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames(
Graph* graph,
std::unordered_set<std::string>* reserved_persistable_node_names) const {
for (auto* node : graph->Nodes()) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue;
for (auto* out_node : node->outputs) {
auto op_type = out_node->Op()->Type();
Expand Down Expand Up @@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes(
std::unordered_set<const Node*> delete_nodes;
const std::unordered_set<ir::Node*> nodes = graph->Nodes();
for (auto* node : nodes) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue;
auto name = node->Var()->Name();
if (reserved_persistable_node_names.count(name) > 0) continue;
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);

Expand Down Expand Up @@ -297,7 +298,7 @@ struct Argument {
XpuQuantPostDynamicWeightBits,
int);
DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types,
XpuQuantPostDynamicOpTypss,
XpuQuantPostDynamicOpTypes,
std::vector<std::string>);

DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ void IRPassManager::CreatePasses(Argument *argument,
}
bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
pass->Set("use_fc_padding", new bool(use_fc_padding));
} else if (pass_name == "fused_multi_transformer_xpu_quant_pass") {
} else if (pass_name == "fused_multi_transformer_xpu_pass") {
auto op_types = argument->xpu_quant_post_dynamic_op_types();
if (std::count(op_types.begin(),
op_types.end(),
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/analysis/passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,17 @@ cc_library(
inference_op_replace_pass
SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass)
cc_library(
save_optimized_model_pass
SRCS save_optimized_model_pass.cc
DEPS analysis_pass argument ir_pass_manager graph_to_program_pass)

cc_library(
analysis_passes
SRCS passes.cc
DEPS ir_graph_build_pass
ir_analysis_pass
save_optimized_model_pass
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/analysis/passes/passes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"

namespace paddle {
namespace inference {
Expand All @@ -33,6 +34,8 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrAnalysisPass));
passes_.emplace("ir_graph_build_pass",
std::unique_ptr<AnalysisPass>(new IrGraphBuildPass));
passes_.emplace("save_optimized_model_pass",
std::unique_ptr<AnalysisPass>(new SaveOptimizedModelPass));
passes_.emplace("memory_optimize_pass",
std::unique_ptr<AnalysisPass>(new MemoryOptimizePass));
passes_.emplace(
Expand Down
144 changes: 144 additions & 0 deletions paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"

#include <unordered_set>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/scope.h"

namespace paddle {
namespace inference {
namespace analysis {

void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
if (!argument->save_optimized_model()) {
LOG(WARNING) << "save_optim_cache_model is turned off, skip "
"save_optimized_model_pass";
return;
}
if (!argument->enable_ir_optim()) {
LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass";
return;
}

std::string model_opt_cache_dir = argument->optim_cache_dir();
if (!model_opt_cache_dir.empty()) {
if (!PathExists(model_opt_cache_dir)) {
PADDLE_ENFORCE_NE(
MKDIR(model_opt_cache_dir.c_str()),
-1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
model_opt_cache_dir));
}
} else {
model_opt_cache_dir = argument->Has("model_dir")
? argument->model_dir()
: GetDirRoot(argument->model_program_path());
}

auto& scope = argument->scope();
auto* graph = argument->main_graph_ptr();

framework::ProgramDesc optimized_program_desc;
framework::ir::GraphToProgram(*graph, &optimized_program_desc);

auto IsPersistable = [](const framework::VarDesc* var) {
if (var->Persistable() &&
var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
var->GetType() != framework::proto::VarType::FETCH_LIST &&
var->GetType() != framework::proto::VarType::RAW) {
return true;
}
return false;
};

auto SerializeParams = [&](const std::string& path) {
framework::ProgramDesc save_program;
auto* save_block = save_program.MutableBlock(0);
std::unordered_set<std::string> save_var_set;
for (size_t i = 0; i < optimized_program_desc.Size(); ++i) {
const auto& global_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : global_block.AllVars()) {
if (IsPersistable(var)) {
framework::VarDesc* new_var = save_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
save_var_set.insert(new_var->Name());
}
}
}

std::string save_params_path = path + "/" + "_optimized.pdiparams";
std::vector<std::string> save_var_list(save_var_set.begin(),
save_var_set.end());
std::sort(save_var_list.begin(), save_var_list.end());
auto* op = save_block->AppendOp();
op->SetType("save_combine");
op->SetInput("X", save_var_list);
op->SetAttr("file_path", save_params_path);
op->CheckAttrs();

framework::Executor exe(platform::CPUPlace{});
exe.Run(save_program, &scope, 0, true, true);
};
// TODO(shentanyue01): Setting hardware and version identification for
// optimized models.
auto SerializeProg = [&](const std::string& path) {
// All persistable var need to be moved to global block
auto* global_block = optimized_program_desc.MutableBlock(0);
for (size_t i = 1; i < optimized_program_desc.Size(); ++i) {
const auto& sub_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : sub_block.AllVars()) {
if (IsPersistable(var) && !global_block->HasVar(var->Name())) {
framework::VarDesc* new_var = global_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
}
}
}
std::string save_model_path = path + "/" + "_optimized.pdmodel";
auto str = optimized_program_desc.Proto()->SerializeAsString();
std::ofstream file(save_model_path.c_str(), std::ios::binary);
file.write(str.c_str(), str.size());
file.close();
};

SerializeProg(model_opt_cache_dir);
SerializeParams(model_opt_cache_dir);
LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir;
}

void SaveOptimizedModelPass::RunImpl(Argument* argument) {
if (argument->use_xpu_valid()) {
SaveOptimizedModel(argument);
}
}

std::string SaveOptimizedModelPass::repr() const {
return "save_optimized_model_pass";
}

} // namespace analysis
} // namespace inference
} // namespace paddle
39 changes: 39 additions & 0 deletions paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <string>

#include "paddle/fluid/inference/analysis/analysis_pass.h"

namespace paddle {
namespace inference {
namespace analysis {

/*
* Save model optimized by ir pass
*/
class SaveOptimizedModelPass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;

private:
void SaveOptimizedModel(Argument *argument);
};

} // namespace analysis
} // namespace inference
} // namespace paddle
5 changes: 4 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_dir_);
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields.

CP_MEMBER(save_optimized_model_);
CP_MEMBER(opt_cache_dir_);
CP_MEMBER(prog_file_);
CP_MEMBER(params_file_);
Expand Down Expand Up @@ -1030,6 +1030,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << model_dir_;
ss << prog_file_;
ss << params_file_;
ss << save_optimized_model_;

ss << use_gpu_;
ss << enable_gpu_mixed_;
Expand Down Expand Up @@ -1352,6 +1353,8 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"});

// ir info
os.InsertRow(
{"save_optimized_model", save_optimized_model_ ? "true" : "false"});
os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"});
os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"});
os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"});
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,7 @@ void AnalysisPredictor::PrepareArgument() {
// Analyze inference_program
argument_->SetPredictorID(predictor_id_);
argument_->SetRootPredictorID(root_predictor_id_);
argument_->SetSaveOptimizedModel(config_.save_optimized_model_);
argument_->SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) {
argument_->SetModelDir(config_.model_dir());
Expand Down Expand Up @@ -1522,7 +1523,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
argument_->SetXpuQuantPostDynamicWeightBits(
config_.xpu_quant_post_dynamic_weight_bits_);
argument_->SetXpuQuantPostDynamicOpTypss(
argument_->SetXpuQuantPostDynamicOpTypes(
config_.xpu_quant_post_dynamic_op_types_);
#endif

Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
void SetParamsFile(const std::string& x) { params_file_ = x; }

///
/// \brief Save optimized model.
///
/// \param save_optimized_model whether to enable save optimized model.
///
void EnableSaveOptimModel(bool save_optimized_model) {
save_optimized_model_ = save_optimized_model;
}
///
/// \brief Set the path of optimization cache directory.
///
Expand Down Expand Up @@ -1243,6 +1251,7 @@ struct PD_INFER_DECL AnalysisConfig {
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
mutable bool is_valid_{true};
bool save_optimized_model_{false};
std::string opt_cache_dir_;
friend class paddle_infer::experimental::InternalUtils;

Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/paddle_pass_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder {
std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass",
"ir_analysis_pass",
"save_optimized_model_pass",
"ir_params_sync_among_devices_pass",
"adjust_cudnn_workspace_size_pass",
"inference_op_replace_pass"}};
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,9 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_profile", &AnalysisConfig::EnableProfile)
.def("disable_glog_info", &AnalysisConfig::DisableGlogInfo)
.def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)
.def("enable_save_optim_model",
&AnalysisConfig::EnableSaveOptimModel,
py::arg("save_optimized_model") = false)
.def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir)
.def("switch_use_feed_fetch_ops",
&AnalysisConfig::SwitchUseFeedFetchOps,
Expand Down
13 changes: 13 additions & 0 deletions test/cpp/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) {
inference::CompareTensor(outputs.front(), naive_outputs.front());
}

#ifdef PADDLE_WITH_XPU
TEST(AnalysisPredictor, save_optimized_model_on) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.SwitchIrOptim(true);
config.EnableSaveOptimModel(true);
config.EnableXpu();
config.SetXpuDeviceId(0);
LOG(INFO) << config.Summary();
CreatePaddlePredictor<AnalysisConfig>(config);
}
#endif

TEST(AnalysisPredictor, ZeroCopy) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
Expand Down