From 3449c5bd398e790c82d08a2c6eadba75154a87c0 Mon Sep 17 00:00:00 2001 From: Travis-Lee Date: Thu, 9 Nov 2023 10:56:29 +0800 Subject: [PATCH] [XPU] Support setting autotune config for fc and conv2d (#58801) --- .../fluid/inference/api/analysis_predictor.cc | 10 +++ paddle/fluid/inference/api/infer_context.cc | 88 +++++++++++++++++++ paddle/fluid/inference/api/infer_context.h | 10 +++ .../inference/api/paddle_analysis_config.h | 6 -- 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 99b50c9b8ab28c..a7c70d6e69cfdd 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -531,6 +531,16 @@ void AnalysisPredictor::InitDeviceContexts() { auto &instance = memory::allocation::AllocatorFacade::Instance(); auto *xpu_context = new InferXPUContext(place_, config_.xpu_config().context_gm_size); + xpu_context->SetConvAutotuneInfo( + config_.xpu_config_.conv_autotune_file, + config_.xpu_config_.conv_autotune_level, + config_.xpu_config_.conv_autotune_file_writeback, + place_); + xpu_context->SetFcAutotuneInfo( + config_.xpu_config_.fc_autotune_file, + config_.xpu_config_.fc_autotune_level, + config_.xpu_config_.fc_autotune_file_writeback, + place_); xpu_context->SetAllocator(instance.GetAllocator(place_).get()); xpu_context->SetGenerator( phi::DefaultXPUGenerator(place_.GetDeviceId()).get()); diff --git a/paddle/fluid/inference/api/infer_context.cc b/paddle/fluid/inference/api/infer_context.cc index 533363f1b25dae..7879adb57d86ef 100644 --- a/paddle/fluid/inference/api/infer_context.cc +++ b/paddle/fluid/inference/api/infer_context.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/api/infer_context.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/dense_tensor.h" #ifdef PADDLE_WITH_XPU #include "xpu/runtime.h" @@ -136,6 +137,93 @@ void InferXPUContext::SetL3Info(size_t l3_size, } } +void InferXPUContext::SetConvAutotuneInfo(std::string conv_autotune_file, + int conv_autotune_level, + bool conv_autotune_file_writeback, + const phi::Place& place) { + phi::backends::xpu::XPUDeviceGuard guard(place.GetDeviceId()); + + VLOG(5) << "XPU conv autotune level:" << conv_autotune_level; + VLOG(5) << "XPU conv autotune file:" << conv_autotune_file; + VLOG(5) << "XPU conv autotune file writeback:" + << conv_autotune_file_writeback; + + if (!conv_autotune_file.empty()) { + int ret; + ret = x_context()->set_option("XPU_CONV_AUTOTUNE_FILE", + conv_autotune_file.c_str()); + PADDLE_ENFORCE_EQ( + ret, + 0, + platform::errors::Unavailable( + "Failed to set XPU conv autotune file %s.", conv_autotune_file)); + } + if (conv_autotune_level > 0) { + int ret; + ret = x_context()->set_option( + "XPU_CONV_AUTOTUNE", (std::to_string(conv_autotune_level)).c_str()); + PADDLE_ENFORCE_EQ( + ret, + 0, + platform::errors::Unavailable("Failed to set XPU conv autotune %d.", + conv_autotune_level)); + } + if (conv_autotune_file_writeback) { + int ret; + ret = x_context()->set_option( + "XPU_AUTOTUNE_WRITEBACK", + (std::to_string(conv_autotune_file_writeback)).c_str()); + PADDLE_ENFORCE_EQ(ret, + 0, + platform::errors::Unavailable( + "Failed to set XPU conv autotune writeback %d.", + conv_autotune_file_writeback)); + } +} + +void InferXPUContext::SetFcAutotuneInfo(std::string fc_autotune_file, + int fc_autotune_level, + bool fc_autotune_file_writeback, + const phi::Place& place) { + phi::backends::xpu::XPUDeviceGuard guard(place.GetDeviceId()); + + VLOG(5) << "XPU fc autotune level:" << fc_autotune_level; + VLOG(5) << "XPU fc autotune file:" << fc_autotune_file; + VLOG(5) << "XPU fc autotune file writeback:" << fc_autotune_file_writeback; + + if (!fc_autotune_file.empty()) { + int ret; + ret = x_context()->set_option("XPU_FC_AUTOTUNE_FILE", + fc_autotune_file.c_str()); + PADDLE_ENFORCE_EQ( + ret, + 0, + platform::errors::Unavailable("Failed to set XPU fc autotune file %s.", + fc_autotune_file)); + } + if (fc_autotune_level > 0) { + int ret; + ret = x_context()->set_option("XPU_FC_AUTOTUNE", + (std::to_string(fc_autotune_level)).c_str()); + PADDLE_ENFORCE_EQ( + ret, + 0, + platform::errors::Unavailable("Failed to set XPU fc autotune %d.", + fc_autotune_level)); + } + if (fc_autotune_file_writeback) { + int ret; + ret = x_context()->set_option( + "XPU_FC_AUTOTUNE_WRITEBACK", + (std::to_string(fc_autotune_file_writeback)).c_str()); + PADDLE_ENFORCE_EQ(ret, + 0, + platform::errors::Unavailable( + "Failed to set XPU fc autotune writeback %d.", + fc_autotune_file_writeback)); + } +} + void InferXPUContext::L3CacheAutotune() { if (l3_autotune_size_ == 0) return; if (holder_map_.empty()) { diff --git a/paddle/fluid/inference/api/infer_context.h b/paddle/fluid/inference/api/infer_context.h index 2b5c4e974eb081..216c7747f07065 100644 --- a/paddle/fluid/inference/api/infer_context.h +++ b/paddle/fluid/inference/api/infer_context.h @@ -69,6 +69,16 @@ class InferXPUContext : public phi::XPUContext { void L3CacheAutotune(); + void SetConvAutotuneInfo(std::string conv_autotune_file, + int conv_autotune_level, + bool conv_autotune_file_writeback, + const phi::Place& place); + + void SetFcAutotuneInfo(std::string fc_autotune_file, + int fc_autotune_level, + bool fc_autotune_file_writeback, + const phi::Place& place); + private: size_t l3_size_{0}; void* l3_ptr_{nullptr}; diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 94215dddc6ccea..0b6ea2e8144992 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -105,23 +105,17 @@ struct PD_INFER_DECL XpuConfig { void* stream{nullptr}; // Conv autotune level. Default 0 means no autotune. - // Note: Paddle-Lite only. int conv_autotune_level{0}; // Base conv autotune info is read from conv_autotune_file. - // Note: Paddle-Lite only. std::string conv_autotune_file; // Whether write new conv autotune info to conv_autotune_file. - // Note: Paddle-Lite only. bool conv_autotune_file_writeback{false}; // Fc autotune level. The Optional values are 0-9. Default 0 means no - // autotune. Note: Paddle-Lite only. int fc_autotune_level{0}; // Base fc autotune info is read from fc_autotune_file. - // Note: Paddle-Lite only. std::string fc_autotune_file; // Whether write new fc autotune info to fc_autotune_file. - // Note: Paddle-Lite only. bool fc_autotune_file_writeback{false}; // Gemm compute precision. Optional values are 0(int8),1(int16),2(int31).