diff --git a/paddle/.set_port.sh b/paddle/.set_port.sh index 617ac79a24889e..e71f494aadf2c5 100755 --- a/paddle/.set_port.sh +++ b/paddle/.set_port.sh @@ -13,6 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -DIRNAME=`dirname $0` -source $DIRNAME/.common_test_util.sh -set_port $@ +DIRNAME="$(dirname "$0")" +sh "$DIRNAME"/.common_test_util.sh +set_port "$@" diff --git a/paddle/.set_python_path.sh b/paddle/.set_python_path.sh index 8fd58925ee4820..8da4565be617bd 100755 --- a/paddle/.set_python_path.sh +++ b/paddle/.set_python_path.sh @@ -24,12 +24,14 @@ PYPATH="" set -x while getopts "d:" opt; do - case $opt in + case "$opt" in d) PYPATH=$OPTARG ;; + *) + ;; esac done -shift $(($OPTIND - 1)) +shift $(("$OPTIND" - 1)) export PYTHONPATH=$PYPATH:$PYTHONPATH -$@ +"$@" diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 5ffaf28fe92f1f..20da74eca4ef87 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2101,7 +2101,8 @@ PDNode *patterns::QuantizePlacement::operator()( PDNode *patterns::Bfloat16Placement::operator()( const std::unordered_set &bfloat16_enabled_op_types) { std::unordered_set supported_op_types = - std::unordered_set({"conv2d", "fusion_gru"}); + std::unordered_set( + {"concat", "conv2d", "fusion_gru", "reshape2", "transpose2"}); if (!bfloat16_enabled_op_types.empty()) { supported_op_types = bfloat16_enabled_op_types; } diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc index b9797a4bfcc004..146e29249b7c61 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc @@ -40,6 +40,10 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("X", {inputs[0], inputs[1]}); } else if (type == "pool2d") { op->SetInput("X", {inputs[0]}); + } else if (type == "transpose2") { + op->SetInput("X", {inputs[0]}); + } else if (type == "reshape2") { + op->SetInput("X", {inputs[0]}); } else { FAIL() << "Unexpected operator type."; } @@ -57,8 +61,8 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ProgramDesc BuildProgramDesc() { ProgramDesc prog; - for (auto& v : - std::vector({"a", "b", "c", "f", "g", "h", "k", "l"})) { + for (auto& v : std::vector( + {"a", "b", "c", "f", "g", "h", "k", "l", "m", "n", "o", "p"})) { prog.MutableBlock(0)->Var(v); } @@ -68,6 +72,9 @@ ProgramDesc BuildProgramDesc() { SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}); SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"}); SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}); + SetOp(&prog, "concat", "concat2", {"l", "m"}, {"n"}); + SetOp(&prog, "transpose2", "transpose", {"n"}, {"o"}); + SetOp(&prog, "reshape2", "reshape", {"o"}, {"p"}); return prog; } @@ -115,7 +122,7 @@ void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) { } TEST(Bfloat16PlacementPass, enable_all) { - MainTest({"conv2d", "pool2d", "relu", "concat"}, 6); + MainTest({"conv2d", "pool2d", "relu", "concat"}, 7); } TEST(Bfloat16PlacementPass, enabled_conv_and_pool) { @@ -123,7 +130,7 @@ TEST(Bfloat16PlacementPass, enabled_conv_and_pool) { MainTest({"conv2d", "pool2d"}, 3); } -TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); } +TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(5); } } // namespace ir } // namespace framework diff --git a/paddle/fluid/inference/api/demo_ci/clean.sh b/paddle/fluid/inference/api/demo_ci/clean.sh index 0d9f3d2aa237ac..5f603465776f1e 100755 --- a/paddle/fluid/inference/api/demo_ci/clean.sh +++ b/paddle/fluid/inference/api/demo_ci/clean.sh @@ -1,4 +1,5 @@ +#!/bin/bash set -x -cd `dirname $0` +cd "$(dirname "$0")" || exit rm -rf build/ data/ set +x diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index 6d283ca56cb652..aee013e8f36528 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -1,29 +1,29 @@ #!/bin/bash set -x -PADDLE_ROOT=$1 -TURN_ON_MKL=$2 # use MKL or Openblas -TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode -DATA_DIR=$4 # dataset -TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, default to /usr/local/TensorRT/include -TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib -MSVC_STATIC_CRT=$7 -inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir +PADDLE_ROOT="$1" +TURN_ON_MKL="$2" # use MKL or Openblas +TEST_GPU_CPU="$3" # test both GPU/CPU mode or only CPU mode +DATA_DIR="$4" # dataset +TENSORRT_INCLUDE_DIR="$5" # TensorRT header file dir, default to /usr/local/TensorRT/include +TENSORRT_LIB_DIR="$6" # TensorRT lib file dir, default to /usr/local/TensorRT/lib +MSVC_STATIC_CRT="$7" +inference_install_dir="${PADDLE_ROOT}"/build/paddle_inference_install_dir -cd `dirname $0` -current_dir=`pwd` -if [ $2 == ON ]; then +cd "$(dirname "$0")" || exit +current_dir=$(pwd) +if [ "$2" == ON ]; then # You can export yourself if move the install path - MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB} + MKL_LIB="${inference_install_dir}"/third_party/install/mklml/lib + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH":"${MKL_LIB}" fi -if [ $3 == ON ]; then +if [ "$3" == ON ]; then use_gpu_list='true false' else use_gpu_list='false' fi USE_TENSORRT=OFF -if [ -d "$TENSORRT_INCLUDE_DIR" -a -d "$TENSORRT_LIB_DIR" ]; then +if [ -d "$TENSORRT_INCLUDE_DIR" ] && [ -d "$TENSORRT_LIB_DIR" ]; then USE_TENSORRT=ON fi @@ -32,77 +32,79 @@ URL_ROOT=http://paddlemodels.bj.bcebos.com/${PREFIX} # download vis_demo data function download() { - dir_name=$1 - mkdir -p $dir_name - cd $dir_name + dir_name="$1" + mkdir -p "$dir_name" + cd "$dir_name" || exit if [[ -e "${PREFIX}${dir_name}.tar.gz" ]]; then echo "${PREFIX}${dir_name}.tar.gz has been downloaded." else - wget -q ${URL_ROOT}$dir_name.tar.gz - tar xzf *.tar.gz + wget -q "${URL_ROOT}""$dir_name".tar.gz + tar xzf ./*.tar.gz fi - cd .. + cd .. || exit } -mkdir -p $DATA_DIR -cd $DATA_DIR +mkdir -p "$DATA_DIR" +cd "$DATA_DIR" || exit vis_demo_list='se_resnext50 ocr mobilenet' for vis_demo_name in $vis_demo_list; do - download $vis_demo_name + download "$vis_demo_name" done # download word2vec data mkdir -p word2vec -cd word2vec +cd word2vec || exit if [[ -e "word2vec.inference.model.tar.gz" ]]; then echo "word2vec.inference.model.tar.gz has been downloaded." else wget -q http://paddle-inference-dist.bj.bcebos.com/word2vec.inference.model.tar.gz - tar xzf *.tar.gz + tar xzf ./*.tar.gz fi # compile and test the demo -cd $current_dir +cd "$current_dir" || exit mkdir -p build -cd build -rm -rf * +cd build || exit +rm -rf ./* for WITH_STATIC_LIB in ON OFF; do - if [ $(echo `uname` | grep "Win") != "" ]; then + if [ "$(uname | grep Win)" != "" ]; then # -----simple_on_word2vec on windows----- - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ - -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" \ + -DMSVC_STATIC_CRT="$MSVC_STATIC_CRT" msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln for use_gpu in $use_gpu_list; do Release/simple_on_word2vec.exe \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --dirname="$DATA_DIR"/word2vec/word2vec.inference.model \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "simple_on_word2vec demo runs fail." exit 1 fi done # -----vis_demo on windows----- - rm -rf * - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + rm -rf ./* + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ - -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" \ + -DMSVC_STATIC_CRT="$MSVC_STATIC_CRT" msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln for use_gpu in $use_gpu_list; do for vis_demo_name in $vis_demo_list; do Release/vis_demo.exe \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --modeldir="$DATA_DIR"/"$vis_demo_name"/model \ + --data="$DATA_DIR"/"$vis_demo_name"/data.txt \ + --refer="$DATA_DIR"/"$vis_demo_name"/result.txt \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "vis demo $vis_demo_name runs fail." exit 1 fi @@ -110,63 +112,66 @@ for WITH_STATIC_LIB in ON OFF; do done else # -----simple_on_word2vec on linux/mac----- - rm -rf * - cmake .. -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + rm -rf ./* + cmake .. -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB - make -j$(nproc) - word2vec_model=$DATA_DIR'/word2vec/word2vec.inference.model' - if [ -d $word2vec_model ]; then + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" + make -j"$(nproc)" + word2vec_model="$DATA_DIR"'/word2vec/word2vec.inference.model' + if [ -d "$word2vec_model" ]; then for use_gpu in $use_gpu_list; do ./simple_on_word2vec \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --dirname="$DATA_DIR"/word2vec/word2vec.inference.model \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "simple_on_word2vec demo runs fail." exit 1 fi done fi # ---------vis_demo on linux/mac--------- - rm -rf * - cmake .. -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + rm -rf ./* + cmake .. -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB - make -j$(nproc) + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" + make -j"$(nproc)" for use_gpu in $use_gpu_list; do for vis_demo_name in $vis_demo_list; do ./vis_demo \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --modeldir="$DATA_DIR"/"$vis_demo_name"/model \ + --data="$DATA_DIR"/"$vis_demo_name"/data.txt \ + --refer="$DATA_DIR"/"$vis_demo_name"/result.txt \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "vis demo $vis_demo_name runs fail." exit 1 fi done done # --------tensorrt mobilenet on linux/mac------ - if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then - rm -rf * - cmake .. -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + if [ "$USE_TENSORRT" == ON ] && [ "$TEST_GPU_CPU" == ON ]; then + rm -rf ./* + cmake .. -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=trt_mobilenet_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ - -DUSE_TENSORRT=$USE_TENSORRT \ - -DTENSORRT_INCLUDE_DIR=$TENSORRT_INCLUDE_DIR \ - -DTENSORRT_LIB_DIR=$TENSORRT_LIB_DIR - make -j$(nproc) + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" \ + -DUSE_TENSORRT="$USE_TENSORRT" \ + -DTENSORRT_INCLUDE_DIR="$TENSORRT_INCLUDE_DIR" \ + -DTENSORRT_LIB_DIR="$TENSORRT_LIB_DIR" + make -j"$(nproc)" ./trt_mobilenet_demo \ - --modeldir=$DATA_DIR/mobilenet/model \ - --data=$DATA_DIR/mobilenet/data.txt \ - --refer=$DATA_DIR/mobilenet/result.txt - if [ $? -ne 0 ]; then + --modeldir="$DATA_DIR"/mobilenet/model \ + --data="$DATA_DIR"/mobilenet/data.txt \ + --refer="$DATA_DIR"/mobilenet/result.txt + EXCODE="$?" + if [ "$EXCODE" != 0 ]; then echo "trt demo trt_mobilenet_demo runs fail." exit 1 fi diff --git a/paddle/fluid/inference/check_symbol.sh b/paddle/fluid/inference/check_symbol.sh index a0f64796576c85..0c66946c4b8a1e 100755 --- a/paddle/fluid/inference/check_symbol.sh +++ b/paddle/fluid/inference/check_symbol.sh @@ -1,12 +1,12 @@ #!/bin/sh -lib=$1 -if [ $# -ne 1 ]; then echo "No input library"; exit -1 ; fi +lib="$1" +if [ "$#" -ne 1 ]; then echo "No input library"; exit 1 ; fi -num_paddle_syms=$(nm -D ${lib} | grep paddle | wc -l) -num_google_syms=$(nm -D ${lib} | grep google | grep -v paddle | grep "T " | wc -l) +num_paddle_syms=$(nm -D "${lib}" | grep -c paddle ) +num_google_syms=$(nm -D "${lib}" | grep google | grep -v paddle | grep -c "T " ) -if [ $num_paddle_syms -le 0 ]; then echo "Have no paddle symbols"; exit -1 ; fi -if [ $num_google_syms -ge 1 ]; then echo "Have some google symbols"; exit -1 ; fi +if [ "$num_paddle_syms" -le 0 ]; then echo "Have no paddle symbols"; exit 1 ; fi +if [ "$num_google_syms" -ge 1 ]; then echo "Have some google symbols"; exit 1 ; fi exit 0 diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index fd20581123c10f..0b2be0076fdb12 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -90,8 +90,6 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); EXPECT_TRUE(quantizer_enable); PD_EnableMkldnnBfloat16(config); - bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); - EXPECT_TRUE(bfloat16_enable); PD_SetMkldnnCacheCapacity(config, 0); PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_DeleteAnalysisConfig(config); diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc index e4035c80341379..7c5757ce9d4c63 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc @@ -190,7 +190,7 @@ std::vector Lexical_Test( // return acc_res; } else { EXPECT_GT(outputs->size(), 0UL); - EXPECT_EQ(outputs[0].size(), 1UL); + EXPECT_GT(outputs[0].size(), 0UL); LOG(INFO) << "No accuracy result. To get accuracy result provide a model " "with accuracy layers in it and use --with_accuracy_layer " "option."; diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index f8b76f387cc195..3f03df04ea3765 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -293,8 +293,12 @@ class CUDNNConvOpKernel : public framework::OpKernel { // ------------------- cudnn conv forward --------------------- ScalingParamType alpha = 1.0f; - ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; - VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); + ScalingParamType beta = 0.0f; + + // NOTE(zhiqiu): inplace addto is not supportted in double grad yet. + // ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + // VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); + for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* workspace_ptr) { @@ -387,6 +391,12 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { if (input_grad) { ResizeToChannelFirst( ctx, input_grad, &transformed_input_grad_channel); + // NOTE(zhiqiu): If inplace_addto strategy is enabled, we need to copy + // the data of input_grad to transformed_input_grad_channel. + if (ctx.Attr("use_addto")) { + TransToChannelFirst( + ctx, input_grad, &transformed_input_grad_channel); + } } } else { transformed_input_channel.ShareDataWith(*input); diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 0e835a62839b4b..7927410ef37862 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -21,6 +21,16 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +inline std::vector GetNmsLodFromRoisNum(const Tensor* rois_num) { + std::vector rois_lod; + auto* rois_num_data = rois_num->data(); + rois_lod.push_back(static_cast(0)); + for (int i = 0; i < rois_num->numel(); ++i) { + rois_lod.push_back(rois_lod.back() + static_cast(rois_num_data[i])); + } + return rois_lod; +} + class MultiClassNMSOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -321,6 +331,8 @@ class MultiClassNMSKernel : public framework::OpKernel { auto* outs = ctx.Output("Out"); bool return_index = ctx.HasOutput("Index") ? true : false; auto index = ctx.Output("Index"); + bool has_roisnum = ctx.HasInput("RoisNum") ? true : false; + auto rois_num = ctx.Input("RoisNum"); auto score_dims = scores->dims(); auto score_size = score_dims.size(); auto& dev_ctx = ctx.template device_context(); @@ -332,7 +344,12 @@ class MultiClassNMSKernel : public framework::OpKernel { int64_t out_dim = box_dim + 2; int num_nmsed_out = 0; Tensor boxes_slice, scores_slice; - int n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1; + int n = 0; + if (has_roisnum) { + n = score_size == 3 ? batch_size : rois_num->numel(); + } else { + n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1; + } for (int i = 0; i < n; ++i) { std::map> indices; if (score_size == 3) { @@ -341,7 +358,12 @@ class MultiClassNMSKernel : public framework::OpKernel { boxes_slice = boxes->Slice(i, i + 1); boxes_slice.Resize({score_dims[2], box_dim}); } else { - auto boxes_lod = boxes->lod().back(); + std::vector boxes_lod; + if (has_roisnum) { + boxes_lod = GetNmsLodFromRoisNum(rois_num); + } else { + boxes_lod = boxes->lod().back(); + } if (boxes_lod[i] == boxes_lod[i + 1]) { all_indices.push_back(indices); batch_starts.push_back(batch_starts.back()); @@ -380,7 +402,12 @@ class MultiClassNMSKernel : public framework::OpKernel { offset = i * score_dims[2]; } } else { - auto boxes_lod = boxes->lod().back(); + std::vector boxes_lod; + if (has_roisnum) { + boxes_lod = GetNmsLodFromRoisNum(rois_num); + } else { + boxes_lod = boxes->lod().back(); + } if (boxes_lod[i] == boxes_lod[i + 1]) continue; scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]); boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]); @@ -403,6 +430,15 @@ class MultiClassNMSKernel : public framework::OpKernel { } } } + if (ctx.HasOutput("NmsRoisNum")) { + auto* nms_rois_num = ctx.Output("NmsRoisNum"); + nms_rois_num->mutable_data({n}, ctx.GetPlace()); + int* num_data = nms_rois_num->data(); + for (int i = 1; i <= n; i++) { + num_data[i - 1] = batch_starts[i] - batch_starts[i - 1]; + } + nms_rois_num->Resize({n}); + } framework::LoD lod; lod.emplace_back(batch_starts); @@ -535,6 +571,34 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker { } }; +class MultiClassNMS3Op : public MultiClassNMS2Op { + public: + MultiClassNMS3Op(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : MultiClassNMS2Op(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext* ctx) const override { + MultiClassNMS2Op::InferShape(ctx); + + ctx->SetOutputDim("NmsRoisNum", {-1}); + } +}; + +class MultiClassNMS3OpMaker : public MultiClassNMS2OpMaker { + public: + void Make() override { + MultiClassNMS2OpMaker::Make(); + AddInput("RoisNum", + "(Tensor) The number of RoIs in shape (B)," + "B is the number of images") + .AsDispensable(); + AddOutput("NmsRoisNum", "(Tensor), The number of NMS RoIs in each image") + .AsDispensable(); + } +}; + } // namespace operators } // namespace paddle @@ -551,3 +615,10 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(multiclass_nms2, ops::MultiClassNMSKernel, ops::MultiClassNMSKernel); + +REGISTER_OPERATOR( + multiclass_nms3, ops::MultiClassNMS3Op, ops::MultiClassNMS3OpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(multiclass_nms3, ops::MultiClassNMSKernel, + ops::MultiClassNMSKernel); diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index b2815cbdc65b53..bb475b4e543660 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -221,5 +221,6 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(concat, MKLDNN, ::paddle::platform::CPUPlace, ops::ConcatMKLDNNOpKernel, + ops::ConcatMKLDNNOpKernel, ops::ConcatMKLDNNOpKernel, ops::ConcatMKLDNNOpKernel); diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index 398bdb01b5c240..28cdd8413ab134 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -142,6 +142,11 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, ops::kTransposeMKLDNNINT8, ops::TransposeMKLDNNOpKernel); +REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE( + transpose2, MKLDNN, ::paddle::platform::CPUPlace, BF16, + ops::kTransposeMKLDNNFP32, + ops::TransposeMKLDNNOpKernel); + REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace, ops::TransposeMKLDNNOpKernel); diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index aa8e39037062e5..7cf85420c579b6 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -622,7 +622,9 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int8_t, ops::ReshapeKernel, uint8_t, ops::ReshapeKernel, int, ops::ReshapeKernel, int64_t, ops::ReshapeKernel, - bool, ops::ReshapeKernel); + bool, ops::ReshapeKernel, + paddle::platform::bfloat16, ops::ReshapeKernel); + REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, double, ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, int64_t, diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 7f2736a9b1d414..92006bff2cc16c 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -52,6 +52,8 @@ std::map> op_ins_map = { {"hierarchical_sigmoid", {"X", "W", "Label", "PathTable", "PathCode", "Bias"}}, {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}}, + {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}}, + {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}}, }; // NOTE(zhiqiu): Like op_ins_map. @@ -78,6 +80,7 @@ std::map> op_outs_map = { {"distribute_fpn_proposals", {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}}, + {"multiclass_nms3", {"Out", "NmsRoisNum"}}, }; // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are diff --git a/paddle/fluid/train/demo/clean.sh b/paddle/fluid/train/demo/clean.sh index a2064492c08b84..192bdf8752c159 100755 --- a/paddle/fluid/train/demo/clean.sh +++ b/paddle/fluid/train/demo/clean.sh @@ -15,6 +15,6 @@ # limitations under the License. set -x -cd "$(dirname "$0")" +cd "$(dirname "$0")" || exit rm -rf build/ set +x diff --git a/paddle/fluid/train/demo/run.sh b/paddle/fluid/train/demo/run.sh index 2955e7574daa2d..a9c0ed4ac68a2a 100755 --- a/paddle/fluid/train/demo/run.sh +++ b/paddle/fluid/train/demo/run.sh @@ -14,14 +14,14 @@ function download() { download # build demo trainer -paddle_install_dir=${PADDLE_ROOT}/build/paddle_install_dir +paddle_install_dir="${PADDLE_ROOT}"/build/paddle_install_dir mkdir -p build -cd build -rm -rf * -cmake .. -DPADDLE_LIB=$paddle_install_dir \ - -DWITH_MKLDNN=$TURN_ON_MKL \ - -DWITH_MKL=$TURN_ON_MKL +cd build || exit +rm -rf ./* +cmake .. -DPADDLE_LIB="$paddle_install_dir" \ + -DWITH_MKLDNN="$TURN_ON_MKL" \ + -DWITH_MKL="$TURN_ON_MKL" make cd .. diff --git a/paddle/fluid/train/imdb_demo/run.sh b/paddle/fluid/train/imdb_demo/run.sh index f71b4bac602a9e..8a585c614e53fe 100644 --- a/paddle/fluid/train/imdb_demo/run.sh +++ b/paddle/fluid/train/imdb_demo/run.sh @@ -1,3 +1,3 @@ - +#!/bin/bash set -exu build/demo_trainer --flagfile="train.cfg" diff --git a/paddle/scripts/paddle_docker_build.sh b/paddle/scripts/paddle_docker_build.sh index d6b639d0da2a54..fdd0d490a6fdb7 100755 --- a/paddle/scripts/paddle_docker_build.sh +++ b/paddle/scripts/paddle_docker_build.sh @@ -15,14 +15,14 @@ # limitations under the License. function start_build_docker() { - docker pull $IMG + docker pull "$IMG" apt_mirror='s#http://archive.ubuntu.com/ubuntu#mirror://mirrors.ubuntu.com/mirrors.txt#g' DOCKER_ENV=$(cat < + +# ENV variables +ARG WITH_GPU +ARG WITH_AVX + +ENV WITH_GPU=${WITH_GPU:-ON} +ENV WITH_AVX=${WITH_AVX:-ON} +ENV DEBIAN_FRONTEND=noninteractive + +ENV HOME /root +# Add bash enhancements +COPY paddle/scripts/docker/root/ /root/ + +RUN apt-get update && \ + apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y curl wget vim git unzip unrar tar xz-utils bzip2 gzip \ + coreutils ntp language-pack-zh-hans python-qt4 libsm6 libxext6 libxrender-dev + + +# Downgrade gcc&&g++ +WORKDIR /usr/bin + RUN apt-get update --fix-missing + COPY tools/dockerfile/build_scripts /build_scripts + RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts + RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ + ENV PATH=/usr/local/gcc-8.2/bin:$PATH + +RUN apt-get update && \ + apt-get install -y python2.7 python2.7-dev \ + python3.5 python3.5-dev \ + python3.6 python3.6-dev \ + python3.7 python3.7-dev \ + python3.8 python3.8-dev && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python2.7 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.5 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.6 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.7 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.8 && easy_install pip && \ + rm /usr/bin/python && ln -s /usr/bin/python2.7 /usr/bin/python && \ + rm /usr/bin/python3 && ln -s /usr/bin/python3.5 /usr/bin/python3 && \ + rm /usr/local/bin/pip && ln -s /usr/local/bin/pip2.7 /usr/local/bin/pip && \ + rm /usr/local/bin/pip3 && ln -s /usr/local/bin/pip3.5 /usr/local/bin/pip3 + + +# install cmake +WORKDIR /home +RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && tar -zxvf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz +ENV PATH=/home/cmake-3.16.0-Linux-x86_64/bin:$PATH + + +# remove them when apt-get support 2.27 and higher version +RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ + tar -xzf binutils-2.33.1.tar.gz && \ + cd binutils-2.33.1 && \ + ./configure && make -j && make install && cd .. && rm -rf binutils-2.33.1 binutils-2.33.1.tar.gz + + +# Install Go and glide +RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin +# install glide +RUN curl -s -q https://glide.sh/get | sh + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN pip3 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip3.6 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3.6 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip3.7 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3.7 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip3.8 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3.8 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip --no-cache-dir install ipykernel==4.6.0 wheel + +#For docstring checker +RUN pip3 --no-cache-dir install pylint pytest astroid isort && \ + pip3.6 --no-cache-dir install pylint pytest astroid isort && \ + pip3.7 --no-cache-dir install pylint pytest astroid isort && \ + pip3.8 --no-cache-dir install pylint pytest astroid isort && \ + pip --no-cache-dir install pylint pytest astroid isort + +COPY ./python/requirements.txt /root/ +RUN pip3 --no-cache-dir install -r /root/requirements.txt && \ + pip3.6 --no-cache-dir install -r /root/requirements.txt && \ + pip3.7 --no-cache-dir install -r /root/requirements.txt && \ + pip3.8 --no-cache-dir install -r /root/requirements.txt && \ + pip --no-cache-dir install -r /root/requirements.txt + + +# Older versions of patchelf limited the size of the files being processed and were fixed in this pr. +# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa +# So install a newer version here. +RUN wget -q http://mirrors.kernel.org/ubuntu/pool/universe/p/patchelf/patchelf_0.10-2_amd64.deb && \ + dpkg -i patchelf_0.10-2_amd64.deb + +# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service +#RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config +#CMD source ~/.bashrc + +# ccache 3.7.9 +RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ + tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \ + ./configure -prefix=/usr/local/ccache-3.7.9 && \ + make -j8 && make install && \ + ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache + +# For CINN environment +RUN apt update --fix-missing +RUN apt-get install autoconf autogen +RUN apt-get install libtool +RUN apt-get install zlib1g-dev +RUN apt install libginac-dev -y +RUN apt install clang cmake -y +RUN python3 -m pip install numpy +RUN python3 -m pip install pybind11 + + +# Install LLVM +RUN echo "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/source.list +RUN echo "deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/source.list +RUN echo "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main" >> /etc/apt/source.list +RUN echo "deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main" >> /etc/apt/source.list +RUN ln -s /usr/bin/llvm-config-6.0 /usr/bin/llvm-config +RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|apt-key add - + +RUN apt update +RUN apt install libclang-dev llvm-10 llvm-10-dev libclang-10-dev -y + + +EXPOSE 22 diff --git a/tools/dockerfile/build_scripts/install_nccl2.sh b/tools/dockerfile/build_scripts/install_nccl2.sh index 0c9bf1409d90d8..2708f4f976d232 100644 --- a/tools/dockerfile/build_scripts/install_nccl2.sh +++ b/tools/dockerfile/build_scripts/install_nccl2.sh @@ -24,8 +24,8 @@ wget -q -O $DIR/$DEB $URL cd $DIR && ar x $DEB && tar xf data.tar.xz DEBS=$(find ./var/ -name "*.deb") for sub_deb in $DEBS; do - echo $sub_deb - ar x $sub_deb && tar xf data.tar.xz + echo "$sub_deb" + ar x "$sub_deb" && tar xf data.tar.xz done mv -f usr/include/nccl.h /usr/local/include/ mv -f usr/lib/x86_64-linux-gnu/libnccl* /usr/local/lib/ diff --git a/tools/gen_alias_mapping.sh b/tools/gen_alias_mapping.sh index 3ab1e68b375574..d199c535f96737 100755 --- a/tools/gen_alias_mapping.sh +++ b/tools/gen_alias_mapping.sh @@ -31,9 +31,9 @@ # \t,,,... -PADDLE_ROOT="$(dirname $(readlink -f ${BASH_SOURCE[0]}))/.." +PADDLE_ROOT="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." -find ${PADDLE_ROOT}/python/ -name '*.py' \ +find "${PADDLE_ROOT}"/python/ -name '*.py' \ | xargs grep -v '^#' \ | grep 'DEFINE_ALIAS' \ | perl -ne ' diff --git a/tools/manylinux1/build_scripts/install_nccl2.sh b/tools/manylinux1/build_scripts/install_nccl2.sh index 0c9bf1409d90d8..c2adf6a79de4bb 100644 --- a/tools/manylinux1/build_scripts/install_nccl2.sh +++ b/tools/manylinux1/build_scripts/install_nccl2.sh @@ -1,4 +1,19 @@ #!/bin/bash + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") if [ "$VERSION" == "10.0" ]; then DEB="nccl-repo-ubuntu1604-2.4.7-ga-cuda10.0_1-1_amd64.deb" @@ -24,10 +39,10 @@ wget -q -O $DIR/$DEB $URL cd $DIR && ar x $DEB && tar xf data.tar.xz DEBS=$(find ./var/ -name "*.deb") for sub_deb in $DEBS; do - echo $sub_deb - ar x $sub_deb && tar xf data.tar.xz + echo "$sub_deb" + ar x "$sub_deb" && tar xf data.tar.xz done mv -f usr/include/nccl.h /usr/local/include/ mv -f usr/lib/x86_64-linux-gnu/libnccl* /usr/local/lib/ rm /usr/include/nccl.h -rm -rf $DIR +rm -rf "$DIR" diff --git a/tools/static_mode_white_list.pyc b/tools/static_mode_white_list.pyc new file mode 100644 index 00000000000000..7d2a45c248ce27 Binary files /dev/null and b/tools/static_mode_white_list.pyc differ