Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick] Cherry-pick 27167 27827 27493 27932 #28454

Merged
merged 1 commit into from
Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 71 additions & 25 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

if(NOT LINUX OR NOT WITH_MKL)
message("Paddle-lite will not build because the required Linux and MKL do not exist.")
if(NOT LINUX)
message("Paddle-lite will not build because the required Linux do not exist.")
set(WITH_LITE OFF)
return()
endif()
Expand All @@ -22,9 +22,11 @@ if(XPU_SDK_ROOT)
set(LITE_WITH_XPU ON)
include_directories("${XPU_SDK_ROOT}/XTDK/include")
include_directories("${XPU_SDK_ROOT}/XTCL/include")
add_definitions(-DPADDLE_WITH_XPU)
add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
set(XPURT_LIB ${XPU_SDK_ROOT}/XTDK/runtime/shlib/libxpurt.so)
set(XPUAPI_LIB ${XPU_SDK_ROOT}/XTDK/shlib/libxpuapi.so)
endif()

if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
Expand All @@ -42,30 +44,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
endif()

# No quotes, so cmake can resolve it as a command with arguments.
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=${WITH_GPU}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=${CUDNN_ROOT}
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=OFF)

ExternalProject_Add(
if(WITH_ARM)
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF
-DLITE_WITH_CUDA=OFF
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON
-DLITE_WITH_PROFILE=OFF
-DARM_TARGET_OS=armlinux
-DWITH_LITE=ON
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=ON)
ExternalProject_Add(
${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
PATCH_COMMAND mkdir -p ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code && touch ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code/__generated_code__.cc
UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
Expand All @@ -81,7 +83,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS}
)
)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
else()
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=${WITH_GPU}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=${CUDNN_ROOT}
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=OFF)

ExternalProject_Add(
${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS}
)
endif()
ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR)
ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR)
set(LITE_BINARY_DIR ${BINARY_DIR})
Expand All @@ -103,8 +149,8 @@ function(external_lite_libs alias path)
endif()
endfunction()

external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so)
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)

add_definitions(-DPADDLE_WITH_LITE)
add_definitions(-DLITE_WITH_LOG)
2 changes: 1 addition & 1 deletion cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ function(copy_part_of_thrid_party TARGET DST)
if (LITE_BINARY_DIR)
set(dst_dir "${DST}/third_party/install/lite")
copy(${TARGET}
SRCS ${LITE_BINARY_DIR}/inference_lite_lib/*
SRCS ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/*
DSTS ${dst_dir})
endif()
endfunction()
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ struct Argument {

DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);

// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
int);

private:
std::unordered_set<std::string> valid_fields_;
};
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
}
disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") {
Expand Down
14 changes: 10 additions & 4 deletions paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,19 @@ void LiteSubgraphPass::SetUpEngine(
bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");

lite_api::TargetType target_type;
if (use_gpu) {
target_type = TARGET(kCUDA);
} else if (use_xpu) {
target_type = TARGET(kXPU);
} else {
#ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM);
#else
target_type = TARGET(kX86);
#endif
}

paddle::lite_api::PrecisionType precision_type =
Expand All @@ -263,11 +268,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible.
paddle::lite::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kInt64)}),
paddle::lite::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
paddle::lite_api::Place({target_type, precision_type}),
paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
};
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size;
if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ void AnalysisConfig::Update() {
}

if (use_xpu_) {
#ifndef PADDLE_WITH_XPU
#ifndef LITE_SUBGRAPH_WITH_XPU
PADDLE_THROW(platform::errors::Unavailable(
"You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime."));
Expand Down
36 changes: 31 additions & 5 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,17 @@ bool AnalysisPredictor::PrepareExecutor() {

void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
<< platform::get_cur_mkldnn_session_id();
std::vector<std::vector<int>> inputs_shape;
for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(inputs[i].shape);
}
MkldnnPreSet(inputs_shape);
#endif
}

void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
VLOG(2) << "In mkldnn cache clear mode.";
Expand All @@ -243,9 +252,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
ss << inputs[i].shape[j] << "-";
for (size_t i = 0; i < inputs_shape.size(); ++i) {
for (size_t j = 0; j < inputs_shape[i].size(); ++j) {
ss << inputs_shape[i][j] << "-";
}
}
VLOG(2) << "Set input shape=" << ss.str();
Expand Down Expand Up @@ -444,6 +453,8 @@ void AnalysisPredictor::PrepareArgument() {
}

if (config_.lite_engine_enabled()) {
argument_.SetCpuMathLibraryNumThreads(
config_.cpu_math_library_num_threads());
argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
Expand Down Expand Up @@ -655,6 +666,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(

bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) {
std::vector<std::vector<int>> shape_vector;
auto names = GetInputNames();
for (size_t i = 0; i < names.size(); ++i) {
auto in_tensor = GetInputTensor(names[i]);
shape_vector.emplace_back(in_tensor->shape());
}
MkldnnPreSet(shape_vector);
}
#endif

executor_->Run();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
Expand All @@ -663,6 +686,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML) && defined(_LINUX)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);

///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void MkldnnPreSet(const std::vector<std::vector<int>> &inputs_shape);

///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ endif()

cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context)
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS})
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)
50 changes: 31 additions & 19 deletions paddle/fluid/inference/lite/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@
#define LITE_WITH_CUDA 1
#endif

#ifdef PADDLE_WITH_XPU
#ifdef LITE_SUBGRAPH_WITH_XPU
#define LITE_WITH_XPU 1
#endif

#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif

#include "paddle/fluid/inference/lite/engine.h"
#include "lite/api/paddle_use_passes.h"
#include <utility>

namespace paddle {
namespace inference {
Expand All @@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return engines_.at(name).get() != nullptr;
}

paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
paddle::lite_api::PaddlePredictor* EngineManager::Get(
const std::string& name) const {
return engines_.at(name).get();
}

paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
if (cfg.valid_places.front().target == TARGET(kCUDA)) {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Env<TARGET(kCUDA)>::Init();
paddle::lite_api::PaddlePredictor* EngineManager::Create(
const std::string& name, const EngineConfig& cfg) {
// config info for predictor.
paddle::lite_api::CxxConfig lite_cxx_config;
lite_cxx_config.set_model_buffer(cfg.model.c_str(), cfg.model.size(),
cfg.param.c_str(), cfg.param.size());
lite_cxx_config.set_valid_places(cfg.valid_places);
#ifdef PADDLE_WITH_ARM
set_threads.set_threads(cfg.cpu_math_library_num_threads);
#else
lite_cxx_config.set_x86_math_library_num_threads(
cfg.cpu_math_library_num_threads);
#endif
} else if (cfg.valid_places.front().target == TARGET(kXPU)) {
#ifdef PADDLE_WITH_XPU
paddle::lite::TargetWrapper<TARGET(kXPU)>::workspace_l3_size_per_thread =
cfg.xpu_l3_workspace_size;

#ifdef LITE_SUBGRAPH_WITH_XPU
lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size);
#endif
}
auto* p = new paddle::lite::Predictor();
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
return p;

// create predictor
std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
engines_[name] = std::move(p);
return engines_[name].get();
}

void EngineManager::DeleteAll() {
for (auto& item : engines_) {
item.second.reset(nullptr);
item.second.reset();
}
}

Expand Down
Loading