From 89530384008b023dc1e8c51e5a8e7e710718efff Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 28 Oct 2020 08:50:39 -0500 Subject: [PATCH 01/10] Fix transpose in conv cudnn kernel when addto enabled (#28295) --- paddle/fluid/operators/conv_cudnn_op.cu | 14 ++++++- .../unittests/test_inplace_addto_strategy.py | 39 +++++++++++-------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index f8b76f387cc195..3f03df04ea3765 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -293,8 +293,12 @@ class CUDNNConvOpKernel : public framework::OpKernel { // ------------------- cudnn conv forward --------------------- ScalingParamType alpha = 1.0f; - ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; - VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); + ScalingParamType beta = 0.0f; + + // NOTE(zhiqiu): inplace addto is not supportted in double grad yet. + // ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + // VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); + for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* workspace_ptr) { @@ -387,6 +391,12 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { if (input_grad) { ResizeToChannelFirst( ctx, input_grad, &transformed_input_grad_channel); + // NOTE(zhiqiu): If inplace_addto strategy is enabled, we need to copy + // the data of input_grad to transformed_input_grad_channel. + if (ctx.Attr("use_addto")) { + TransToChannelFirst( + ctx, input_grad, &transformed_input_grad_channel); + } } } else { transformed_input_channel.ShareDataWith(*input); diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py index 0c43d5693456c4..b9089448d53f1c 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -30,22 +30,21 @@ def __init__(self, filter_size, stride=1, groups=1, - act=None, - use_cudnn=False): + data_format="NCHW"): super(ConvBNLayer, self).__init__() - self._conv = fluid.dygraph.Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, + self._conv = paddle.nn.Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, - act=None, bias_attr=False, - use_cudnn=use_cudnn) + data_format=data_format) - self._batch_norm = fluid.dygraph.BatchNorm(num_filters, act=act) + self._batch_norm = paddle.nn.BatchNorm( + num_filters, data_layout=data_format) def forward(self, inputs): y = self._conv(inputs) @@ -53,19 +52,20 @@ def forward(self, inputs): return y -def create_program(): +def create_program(data_format="NCHW"): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): x = fluid.data(name='img', shape=[-1, 3, 224, 224]) x.stop_gradient = False + if data_format == "NHWC": + x = paddle.transpose(x, [0, 2, 3, 1]) x = fluid.layers.prelu(x, mode="channel") conv = ConvBNLayer( num_channels=3, num_filters=3, filter_size=1, - act='relu', - use_cudnn=True) + data_format=data_format) y = conv(x) + x loss = fluid.layers.reduce_sum(y) @@ -77,7 +77,7 @@ def create_program(): class TestInplaceAddto(unittest.TestCase): - def test_result(self): + def check_result(self, data_format="NCHW"): def run_program(enable_addto): np.random.seed(10) paddle.seed(10) @@ -85,7 +85,7 @@ def run_program(enable_addto): if fluid.core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) fluid.set_flags({"FLAGS_max_inplace_grad_add": 2}) - loss, main, startup, w = create_program() + loss, main, startup, w = create_program(data_format=data_format) place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) @@ -98,7 +98,7 @@ def run_program(enable_addto): exe.run(startup) img = np.random.uniform(-128, 128, [8, 3, 224, 224]).astype(np.float32) - for i in range(2): + for i in range(10): res = exe.run(compiled, feed={'img': img}, fetch_list=[loss.name, w.name]) @@ -106,9 +106,16 @@ def run_program(enable_addto): res1, w1 = run_program(True) res2, w2 = run_program(False) - print(res1, res2) + self.assertTrue(np.array_equal(res1, res2)) + def test_nchw(self): + self.check_result() + + def test_nhwc(self): + self.check_result("NHWC") + if __name__ == "__main__": + paddle.enable_static() unittest.main() From 03511689670ee3d4652fd36d8e610703b3c6fb7b Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Thu, 29 Oct 2020 09:36:34 +0800 Subject: [PATCH 02/10] Fix lr scheduler step error in hapi when use static mode (#28297) * fix lr scheduler * fix code style --- python/paddle/hapi/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index ff962fb1c1d5cb..aa99d698bc7b2b 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -454,7 +454,8 @@ def _run(self, inputs, labels=None): rets.insert(i, feed[name]) # step learning rate scheduler on each batch end - if self.model._optimizer and \ + if self.model._optimizer and self.mode == 'train' and \ + hasattr(self.model._optimizer, '_learning_rate') and \ isinstance(self.model._optimizer._learning_rate, paddle.optimizer.lr.LRScheduler): self.model._optimizer._learning_rate.step() From 842a4e5abd2c766e10368fac1251a16e2c389a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 29 Oct 2020 10:01:56 +0800 Subject: [PATCH 03/10] fix analyzer_capi_tester, test=develop (#28289) --- paddle/fluid/inference/tests/api/analyzer_capi_tester.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index fd20581123c10f..0b2be0076fdb12 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -90,8 +90,6 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); EXPECT_TRUE(quantizer_enable); PD_EnableMkldnnBfloat16(config); - bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); - EXPECT_TRUE(bfloat16_enable); PD_SetMkldnnCacheCapacity(config, 0); PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_DeleteAnalysisConfig(config); From e8f2614da5e57b114efeaceaff8f8488575bd8c4 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Thu, 29 Oct 2020 10:54:43 +0800 Subject: [PATCH 04/10] Enhance multiclass_nms op to support LoD for dygraph mode (#28276) * Enhance multiclass_nms to support LoD for dygraph mode * fix some error in multiclass_nms * update GetLodFromRoisNum to GetNmsLodFromRoisNum --- .../operators/detection/multiclass_nms_op.cc | 77 ++++++++++- paddle/fluid/pybind/op_function_generator.cc | 2 + .../tests/unittests/test_multiclass_nms_op.py | 122 ++++++++++++++++++ tools/static_mode_white_list.pyc | Bin 0 -> 21082 bytes 4 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 tools/static_mode_white_list.pyc diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 0e835a62839b4b..7927410ef37862 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -21,6 +21,16 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +inline std::vector GetNmsLodFromRoisNum(const Tensor* rois_num) { + std::vector rois_lod; + auto* rois_num_data = rois_num->data(); + rois_lod.push_back(static_cast(0)); + for (int i = 0; i < rois_num->numel(); ++i) { + rois_lod.push_back(rois_lod.back() + static_cast(rois_num_data[i])); + } + return rois_lod; +} + class MultiClassNMSOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -321,6 +331,8 @@ class MultiClassNMSKernel : public framework::OpKernel { auto* outs = ctx.Output("Out"); bool return_index = ctx.HasOutput("Index") ? true : false; auto index = ctx.Output("Index"); + bool has_roisnum = ctx.HasInput("RoisNum") ? true : false; + auto rois_num = ctx.Input("RoisNum"); auto score_dims = scores->dims(); auto score_size = score_dims.size(); auto& dev_ctx = ctx.template device_context(); @@ -332,7 +344,12 @@ class MultiClassNMSKernel : public framework::OpKernel { int64_t out_dim = box_dim + 2; int num_nmsed_out = 0; Tensor boxes_slice, scores_slice; - int n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1; + int n = 0; + if (has_roisnum) { + n = score_size == 3 ? batch_size : rois_num->numel(); + } else { + n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1; + } for (int i = 0; i < n; ++i) { std::map> indices; if (score_size == 3) { @@ -341,7 +358,12 @@ class MultiClassNMSKernel : public framework::OpKernel { boxes_slice = boxes->Slice(i, i + 1); boxes_slice.Resize({score_dims[2], box_dim}); } else { - auto boxes_lod = boxes->lod().back(); + std::vector boxes_lod; + if (has_roisnum) { + boxes_lod = GetNmsLodFromRoisNum(rois_num); + } else { + boxes_lod = boxes->lod().back(); + } if (boxes_lod[i] == boxes_lod[i + 1]) { all_indices.push_back(indices); batch_starts.push_back(batch_starts.back()); @@ -380,7 +402,12 @@ class MultiClassNMSKernel : public framework::OpKernel { offset = i * score_dims[2]; } } else { - auto boxes_lod = boxes->lod().back(); + std::vector boxes_lod; + if (has_roisnum) { + boxes_lod = GetNmsLodFromRoisNum(rois_num); + } else { + boxes_lod = boxes->lod().back(); + } if (boxes_lod[i] == boxes_lod[i + 1]) continue; scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]); boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]); @@ -403,6 +430,15 @@ class MultiClassNMSKernel : public framework::OpKernel { } } } + if (ctx.HasOutput("NmsRoisNum")) { + auto* nms_rois_num = ctx.Output("NmsRoisNum"); + nms_rois_num->mutable_data({n}, ctx.GetPlace()); + int* num_data = nms_rois_num->data(); + for (int i = 1; i <= n; i++) { + num_data[i - 1] = batch_starts[i] - batch_starts[i - 1]; + } + nms_rois_num->Resize({n}); + } framework::LoD lod; lod.emplace_back(batch_starts); @@ -535,6 +571,34 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker { } }; +class MultiClassNMS3Op : public MultiClassNMS2Op { + public: + MultiClassNMS3Op(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : MultiClassNMS2Op(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext* ctx) const override { + MultiClassNMS2Op::InferShape(ctx); + + ctx->SetOutputDim("NmsRoisNum", {-1}); + } +}; + +class MultiClassNMS3OpMaker : public MultiClassNMS2OpMaker { + public: + void Make() override { + MultiClassNMS2OpMaker::Make(); + AddInput("RoisNum", + "(Tensor) The number of RoIs in shape (B)," + "B is the number of images") + .AsDispensable(); + AddOutput("NmsRoisNum", "(Tensor), The number of NMS RoIs in each image") + .AsDispensable(); + } +}; + } // namespace operators } // namespace paddle @@ -551,3 +615,10 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(multiclass_nms2, ops::MultiClassNMSKernel, ops::MultiClassNMSKernel); + +REGISTER_OPERATOR( + multiclass_nms3, ops::MultiClassNMS3Op, ops::MultiClassNMS3OpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(multiclass_nms3, ops::MultiClassNMSKernel, + ops::MultiClassNMSKernel); diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 7f2736a9b1d414..cac44173c17727 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -52,6 +52,7 @@ std::map> op_ins_map = { {"hierarchical_sigmoid", {"X", "W", "Label", "PathTable", "PathCode", "Bias"}}, {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}}, + {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}}, }; // NOTE(zhiqiu): Like op_ins_map. @@ -78,6 +79,7 @@ std::map> op_outs_map = { {"distribute_fpn_proposals", {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}}, + {"multiclass_nms3", {"Out", "NmsRoisNum"}}, }; // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py index 34c19b88bcdbac..3158d78db63dcd 100644 --- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py @@ -571,6 +571,128 @@ def test_scores_Variable(): self.assertRaises(TypeError, test_scores_Variable) +class TestMulticlassNMS3Op(TestMulticlassNMS2Op): + def setUp(self): + self.set_argument() + N = 7 + M = 1200 + C = 21 + BOX_SIZE = 4 + background = 0 + nms_threshold = 0.3 + nms_top_k = 400 + keep_top_k = 200 + score_threshold = self.score_threshold + + scores = np.random.random((N * M, C)).astype('float32') + + scores = np.apply_along_axis(softmax, 1, scores) + scores = np.reshape(scores, (N, M, C)) + scores = np.transpose(scores, (0, 2, 1)) + + boxes = np.random.random((N, M, BOX_SIZE)).astype('float32') + boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5 + boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5 + + det_outs, lod = batched_multiclass_nms(boxes, scores, background, + score_threshold, nms_threshold, + nms_top_k, keep_top_k) + det_outs = np.array(det_outs) + + nmsed_outs = det_outs[:, :-1].astype('float32') if len( + det_outs) else det_outs + index_outs = det_outs[:, -1:].astype('int') if len( + det_outs) else det_outs + self.op_type = 'multiclass_nms3' + self.inputs = {'BBoxes': boxes, 'Scores': scores} + self.outputs = { + 'Out': (nmsed_outs, [lod]), + 'Index': (index_outs, [lod]), + 'NmsRoisNum': np.array(lod).astype('int32') + } + self.attrs = { + 'background_label': 0, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0, + 'normalized': True, + } + + def test_check_output(self): + self.check_output() + + +class TestMulticlassNMS3OpNoOutput(TestMulticlassNMS3Op): + def set_argument(self): + # Here set 2.0 to test the case there is no outputs. + # In practical use, 0.0 < score_threshold < 1.0 + self.score_threshold = 2.0 + + +class TestMulticlassNMS3LoDInput(TestMulticlassNMS2LoDInput): + def setUp(self): + self.set_argument() + M = 1200 + C = 21 + BOX_SIZE = 4 + box_lod = [[1200]] + background = 0 + nms_threshold = 0.3 + nms_top_k = 400 + keep_top_k = 200 + score_threshold = self.score_threshold + normalized = False + + scores = np.random.random((M, C)).astype('float32') + + scores = np.apply_along_axis(softmax, 1, scores) + + boxes = np.random.random((M, C, BOX_SIZE)).astype('float32') + boxes[:, :, 0] = boxes[:, :, 0] * 10 + boxes[:, :, 1] = boxes[:, :, 1] * 10 + boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 + boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 + + det_outs, lod = lod_multiclass_nms( + boxes, scores, background, score_threshold, nms_threshold, + nms_top_k, keep_top_k, box_lod, normalized) + + det_outs = np.array(det_outs) + nmsed_outs = det_outs[:, :-1].astype('float32') if len( + det_outs) else det_outs + self.op_type = 'multiclass_nms3' + self.inputs = { + 'BBoxes': (boxes, box_lod), + 'Scores': (scores, box_lod), + 'RoisNum': np.array(box_lod).astype('int32') + } + self.outputs = { + 'Out': (nmsed_outs, [lod]), + 'NmsRoisNum': np.array(lod).astype('int32') + } + self.attrs = { + 'background_label': 0, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0, + 'normalized': normalized, + } + + def test_check_output(self): + self.check_output() + + +class TestMulticlassNMS3LoDNoOutput(TestMulticlassNMS3LoDInput): + def set_argument(self): + # Here set 2.0 to test the case there is no outputs. + # In practical use, 0.0 < score_threshold < 1.0 + self.score_threshold = 2.0 + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/tools/static_mode_white_list.pyc b/tools/static_mode_white_list.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d2a45c248ce271c1c4fff310505a172339e5eee GIT binary patch literal 21082 zcmeHPb-XNhbw9sffMCJhH3W&g_d;+8kN`n~B)AN7_dC0HxVt->nce%|4eqX`c#Bq` z)Q}dJ3ee(GfhyEps257V=QlIw%nR})-aa1FsV z1=kW>TW}r0bp_WGTwibl!3_mB65LpD6TwXdHxt}ka0|gL1-BC1T5z)9HiAJ55YYJ_Y#~fxVPXw zg8K^25u7WypWyz22MEp+JW%i;!Gi@45u7i0sNi9OhYKDdxIl2B;E{qy2_7wYjNq|? z#|a)Uc!J=Gf+qTPzs(R*bz(xGr?R?2^NBj1ht?MECnmU zuHdNPm|#zEvEZqKrwN`ec!uDaf@cYyEqIRLxq{~jo-cTT;Dv%030^FCiQuJzmkC}j zc!l7Vf>#M%EqIOKM+C1G{HWk{g4YY)Ab6wTO@cQI-Xi!h!H)~xDtMdV?Sgj*-YIyO z;3ot>DflVDPYd2Hc#q&`1n(96tl;MaKQH(N!7mElCwRZ$1A-3a_`Ki?f-efbB>1x6D}t{I zz9#s(;2VP968yH{cLcvH_@>}ng5MMTzTgi8e<=7P!5<60E%=V$PXvD|_%p$u3;sgz zmx8|%{I%e31b-{|JHg)z{z34Mf`1bHv*5dee-Zqv;NJxQF8B|@e+vFf@ZW;(3BE7* zAHfeO{{bN&2AmLZnSjd%TrS|mfXfG5A>fJuR|>dtz)1mD3Ak#&)dH>_aE*X#23#xP z+5y)IxNgAp0a6!O@0gnuLRKTMH z9ux4`fX4+qKHv!fPYifcKo;=ifIMI;U=)CWB48Xa3D^!O1D+DF6EF>!1;@bSI2N!MaB;v>1D+P}^nhmsJTu@~0nZM2PQY^mo)_@^fENV3FyKW2 zFAjJ~z)J&O7Vz?bR|LE=;8g*y4tPz#j|99n;70>q7x4OkHw3&f;7tK<4tPtzj|KdA zz*_^}7V!3fcLcmM;9UVf5%7}%KNaxP0q+iYPr%Ovyf@%y1AZ>x=L3Er;1>hl7x4ao z4+MNL;Fkh^Ip9M99}f6Pz()f<7Vz>Q{B6MB1^j)$KLq?^z&{22bHH~4 z{w3gF1O6@G-vj<5;6DTYE8xEaz8CQQfd2{jL4ZVrh!}A~#APBb8*#bVnf((ZE+5tG zpG03kV8oT8x_~S5--wf9=LoJsU{t?wH9{h;5pm6kYeif;;yMx6jksRK^&@T&al?ok zMcg>zCJ{G{xLL%_BW@9K%ZOV=+&bdqh}%S*5^>vz+eO?y;tmm~Mw}LLdc+wKhawI~ z9EmtH;*Jq#McgUk&JlNsxNF4SBJLh>kBECl+$-YjhLA|4y@xQNF`JR#zV5l@Q9 zBAy(PM{GrmA`nqTj3Xuy+Yx2NQzCXErV+D<^HIK3&~DGsk8r#wJj>k-O%Hmb*pym~)#S|O{8rby?wmP4zZ)goKw zThl^^pVT83IS!{|UA2cayu-4nb(BqSii=joe8g*~D=x2Lg9nJ+1Hg-_nmRRXnc~!2OxML)k2Ir4R8}>+$($w5s#bo&rrE^3k%~&6j00&*t-NQs)>8Tm{BH{Jx4vywuSjTc7_^N+L6Tw7+AAItJZ>GH~@6~={3929g^Ax26< z7CbV=cXyI0I}hOiq^{E;2ZsV-O_?rB>8Vi8C%X0x{Mah1ymYPPE#}~;z2P-6nyse8 z(_xk_eob4U)y7F(CJH%xIvq_bD%~{SE9&JfZ2W9Ny(vaJ8MU}v@>Uc>3Q4OKe(CvW zyQ;HEF)!+T$-nW`SF$zgd_F0bzE?fTX4J%vmd?3)(o}WQLtnVA^SyNE>}aj~b78X| zdDHN$o1k(sSCHuCG zZbf3!WKo=zMrA%NFHREcjM&OYJ4f>xPK2$zDMwjNnV#pf!Y#p8z8r05^Qukqq$yPyWZ)owj5DJP>y0(#jh~_b}~dP6Ss9nO&zwjFNRK zDN@TT^vjKMIuMnO zD_3(2mzX0NG#-3wT8(xFC#`t-V@vXHo=>R9#u@s|-9vN}$r73O=%7m)lZI#QE!EmA zdggYPnFyWL`s=lY2)e|DjvTlUp(M}RLIiCtWU{1tui;f*Dp^~)ReIyyj8$9#aD8pYOXq7rHrk7T8AG9L3-!#tLfe6>ib z(K;?pyru26ADQX?t&bfXxfs&}q~KGfDBC@x3^Ub*qZdtGl_``Uo84kFGGHLFNA)ae zK-Bg9G%mN&$!^^ z9~ax124-bOwx&41lo5M#jRKZtH3-OOU1-oOshAqgCa*GBa;||xkLZNQC2%jH8DTGz zJ727p%Uky}E7aD~E=-FQ`B1tx?6Y~{jge_ep(dtr)!P^|saF|U?YgyrPjQwGINm1A zOH$CQ6frh$EJDd$)ite>lHRma(8{%K|VU3Rx}-# zFu)kXeGgo{i+B&lF(N+hJm#p!#jR3EVB8j_-Nj_yyfkU0{u31v0T+Xov<+*`*L4Vv-CUs^Xz^>Ymndpn8|NhPz{+w3BU-|lc_!wH{cXl6g)xq%Tf47e ze337hsP8QbSLbn|$%j@%x@aKR^veT<7QdR$ih4rQ+Ra+HMjw7IEy(0^mydM}?T%Ie zk`9|KOhp`C!7sjE)lo8=oO7_o&J1-+T`Kt zu1Rk{XpNH7f;P+Keyl$!6&jTs8XPu(7ki9`sWaPov+Xj`PN^B#&&bQ#4mTiyVtL%I@{^EO?MPGWV!9%ISiMSBNWDXrhTYsWi)$cLL)+FVy+p-@%}eWI z95vfwE`Cvp&~#td#~$6;S*mMahHyz+HCp&mx5}v&uXAMu_rr%vD!gWCD8+VMGF^sq zhgZ}wJW{%tmd{vb?T4AM?6%-Atx|PmOg+Sq2;D;M2r2_t?CQA|&GhSZr9ZlutSjBx zZ6^70n~J=wi*>6v$yco0o%azYzKpkL;D7$ z^w%OsuV#6Q;8uVYt!y#XKv~IR@s)x*<2kN*>V{%6(m=bej;4{uT#FVoWq0vSrMV`b zF@?x_eQ`rFz^1~*U^QkPmi3#lozw$x#=4O?nNFz8eCTgAHhLRVZna_xux+;K0NZ+# zg3UB+GssSLfc3$vt**MEU}JbWJ8bo4!cBb38Dmps3tDp-mz1O= zWnFwtAPzhuY0_FJ_XUI17R#-aFx;lMM*DG@`U;9%FJVIx+nqd&UNxk<$4P_r>Y|)8 zV;QBM%z<}XyN#CpJ$P82cEAZqES8C>)JIEp2l@kMtYzoZ<=%l?r22}c!<41Xi&+Fx z&PR27N{hovmdB4}ohFf{n~0>CDwr;S^D+0BSIl8Zy{*+)e>T%<*$gC!X$fBGx(rsO&X?Mx<<iu$b@Mgrz{iBkfHIr1b_>^)CGHC$pIuQh#S=yOuUZ!!^gzshD$bNcS#E~&O?3mB!>z)DQJ;gt2Lxt8Cw?7+6kHVdg!3WrZ^ zi{oB$Hzsvu^R)uGRiub)w9n=@O_qpfHQ$y1nyhz$e#(f3XjZJ1Z8^&kl!V#3Z&EGJ z7usP`0{Ed6oH{bof{9Ra4JPQRk(#qgotma@+1w&@bE`$;?9_@;&o|?-0z0|)B!fWK z1(`$*=vxiCv|WOv2et%)8N0V}H?piLGdzxUc6Uu6r!oO4B)0p{@_NEFodF4hj!xK2 z1{vEZK^QACiSlC;%v@G?GJWbm?Rkdt>+)F73OmKDptUSnRk9GSE7{eBhO))6(R9TQ zPfG7J1M^+l%3f>T2T-PNS&77NBlxUnw)>u-i4c2q1@oAiLF};7cmlh0YE zWw&kiOtm7x%@|V*NTf&xTB$nf^i9yZ(xi)#aa&kEHeyDglg*ZB-;O$6F@d%k3azRO zzCJq87Z7VYY@*3BSjxnbT}R5p?Z$IWv?yO*0bwPl4+fc6r8;P;&a}bDV9&L2+Z$y} z7CrQhMCxk$X4-3HzT{*{exa`&8DDxoVq8!Fy|&3q`@8FXvR$hNq;?z4M+da{z1sNp z)aj%iMp#yhv+m@k96fBKq(X4};KS88fhd(`W2usfRm@J94UYziJXzOvtF=t+8q3LD z{AFp3fym5MKVT}Ku^xET-Jw3I;1*;Es~Vamd#Jtn5JL%(GROWIga?-zMY1hN7WcI% zXmh3&Gds>fqu6X%%{_qHKO|WQ-nh}X7y7J4t;OL!M%zE0p1EZ3^SS zdBL1rQ?R7boM+Zn+e&3BPH2CrEex5a6UoX7hYuiR=9+6<>sJlNRV??BaAu79wzJc> z?L$)q*|WVf|}8wfJFE)W~wZ1dh>5M zsYSMpA?I`5cj@%DR-#SE8c2in@^qB*Y%keVQ}~dDzQswwf(K3m?21l(aq!+!eB4_*kIIL2 z)HRy%Vcn{5;%ITG6Cx8Ag{9ce_|lb-Qlpz!F*#Q_O=2o88CM8oK5UZ3pC0Www2cee zR`UQl=uMfm@%Ix}TdIq(madaIW0Kjv#_|gk6Oy#v8yvKltTfT0`*s@D4on(tu|-{^ z9yiTwUwIf1jG>-Z{z(R78Tm*NVnjk=Uowf*!V4o#Z5)~u))b;HF?86hp*o}(*-5~_ zVN_7{)yDXJ3GL$6?P5xj>;686E$s9PK%tgJCY9lyn%1hdF-xeUoP}NcTY+XC=$=zd z;S^=FiUc{AgA^uJM|~KRels@j zf<_BQI+b89Q!Z2RY^Ew@*lkTgLqcsMtAn;1*rPk6Pl6x=gFK!py^!)4*Gy0UqO(oq zj6#i;IEHj=iirl}gbn(;ZFz@2RqG3_M6A)7_+-(2H&Sj(E5@n+XQ5)(DEG_sB51Uw zP%_bIYc@QVS?FfFE~$Uo@9u1r)3SA&H0#>6;v|#*&Y|jy-~Q3@ZF^?%G2l!e?5&kj z+UkmP#q4Z9dR)<+S@7yq>>eAM0~{1YIAtS0YV-vK&*_(Liw>+xq-ftSy$07QmYf{F z9HhgXHmWrGB(IO!eSrn@{4J;6P<4f%5l!wMN)r1daCqi1sxt= zTvv1q&XukS!1urXseEBJxpvlX(3Vxb@Z?)=ybfxpFU+{jBDBBR8|Et@Bgj6jwwo@& z&IO+fBy}*>D7019WU6k}i_fyqmNUzOOJ?0vmtH{nyv7KzWa+9(k0SerT&C*7=Bn0A z+uv@z*uqP4V${)RAc;B-;X~;(#+*!YB^uW|50k-6Z{>!ZO^p_{g=_rg8<{CBY?;*A z#95)6B`UwhCbCV=%swq?7u!ch2efkR+^admmqhykXk4}IFlw?hp(Wr!FUZ7!D8>XX zX6^5;URu#KH=f$@<+~aulEZx1N>jF0_UvY>1v@@nn?tRvhA5n4)Td$AJfm8Jl!^G} zN{$xgWLw+Bnk4rRf=xz`Qik-oFiX_FNDJgfHf*n&R{iTO6HNu~9x-ENw%0#@SUYSB z=BjoyeYDOY-IJu{w$?bDe)jsdt6OlUv04|g>r;!0hq!fwAmedZT0 z9_sB!Gj?c`D{~S1$D~uXJs5B6T;i}S=Tjc0jH6|beA7`Qaat>J%P?P{~7U2P{NMH-nXYJTGFq&DL_0+?qrz`f_p#2zv34_P@Z9;3u?~9*p zlgsL@Y(sc9oxVQsW70?`liaS^mQPRZxz0+Y7g6~LB9-G3Gm~^Co~LG%cMbWO(1@#$ zAqip5f=e|!{J6kpC%kD>a}E0C^)Y6C+c87bDgA{KYWVh# z&pHk#jq-J@pmosTjj?9l4jk)iS{SuP99M1qDq?leUHbPfzLb$--vbsOV^pP_btm%& zEI!P|apU2M4>4kRl+?fd;==ll#mCxpJUyP}Dekq6(qPiReG$YbHD=80$Q+`HHBa8X z1nl=o5A{r**d1az!#}kf)P;&e6JQOcHpJ21h6fB9nIyXK*_Anr!N)_LY;iqv7~Lu8 z?eE-+9?^E?4t?-M{kCFjeCVvf#NoP%zJzrZ?dux*_58#*Uwq8)RQhd&W9+hncan}Z zHf?CgV1WaNXrswj8OpjtZDPk?mG_NH+u;U74p?9_O!gsW2eoPGU?WE-6OJ*Tpf}h! zjd7@}4{xAt(GGpUqExE(a{$NL6>I-kz_Dh*dY9oSyP8(a9c;eA#bD!{<`aE{Pq(nK z10|gpd`p_tHQ8x%3B!?{(5HfR2lp8k*LE0J$gV=-Lr?3?^ZHOfap!giw#LjwH>Iun zbbI*9mLjyuO>0V}HLmlc3&xLRyr%kXDa_c~sTY?@Wxl~Ijn4E#=)rS07y0zMl|7T1 zEi_w8&qE8q(5_FX=e@M=iP0?dw^MAAPYF*0zwW8ttmx-#&e7+=kpK03D~%EzOGiC!Mr~S_<)8rc3bCw_pw(T=TG3?OJHR z26t{hTJLf<<7zuBJ=IxMWU7lNkGr1{CeX_BuF33$D#AAFpl$flijtXRoy<46t>`BP z|M2P8Fob=@}`qa0yQ!2>c4$fd8V1%;WG|5X&Jv6u2cWJH+OB;?wXv87P+6^ ze+(ve!yZmdc9 zP-oOccX)Hw>5B`_<<(N@e|^#JWp+2o_A*nRMw{?<;8X-m7`u39ViM5ushPk1NeJU< zm!uCT8Ff2c+gx8?uk)mb`#YL)w7rKk*t}Wa0^5Lg|9uOEo_|VELDC@Of(!3`;d%GX z9`ew0&dn}7_ks)0JOBRKgU`F*Le0Iz|7h;PpQq~=Y}4X&Z9{i|KYZF5r$4NPE?InR zPVcHjT5if%Bho@6yM3oE_R>3|yA#ewg7;MFH78$I Date: Thu, 29 Oct 2020 04:09:54 +0100 Subject: [PATCH 05/10] Add bf16 transpose2, reshape2, concat ops (#28195) --- .../framework/ir/graph_pattern_detector.cc | 3 +- .../cpu_bfloat16_placement_pass_tester.cc | 15 ++- .../operators/mkldnn/concat_mkldnn_op.cc | 1 + .../operators/mkldnn/transpose_mkldnn_op.cc | 5 + paddle/fluid/operators/reshape_op.cc | 4 +- .../mkldnn/test_concat_bf16_mkldnn_op.py | 110 ++++++++++++++++++ .../unittests/mkldnn/test_reshape_bf16_op.py | 62 ++++++++++ .../mkldnn/test_transpose_bf16_mkldnn_op.py | 66 +++++++++++ 8 files changed, 260 insertions(+), 6 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 5ffaf28fe92f1f..20da74eca4ef87 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2101,7 +2101,8 @@ PDNode *patterns::QuantizePlacement::operator()( PDNode *patterns::Bfloat16Placement::operator()( const std::unordered_set &bfloat16_enabled_op_types) { std::unordered_set supported_op_types = - std::unordered_set({"conv2d", "fusion_gru"}); + std::unordered_set( + {"concat", "conv2d", "fusion_gru", "reshape2", "transpose2"}); if (!bfloat16_enabled_op_types.empty()) { supported_op_types = bfloat16_enabled_op_types; } diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc index b9797a4bfcc004..146e29249b7c61 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc @@ -40,6 +40,10 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("X", {inputs[0], inputs[1]}); } else if (type == "pool2d") { op->SetInput("X", {inputs[0]}); + } else if (type == "transpose2") { + op->SetInput("X", {inputs[0]}); + } else if (type == "reshape2") { + op->SetInput("X", {inputs[0]}); } else { FAIL() << "Unexpected operator type."; } @@ -57,8 +61,8 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ProgramDesc BuildProgramDesc() { ProgramDesc prog; - for (auto& v : - std::vector({"a", "b", "c", "f", "g", "h", "k", "l"})) { + for (auto& v : std::vector( + {"a", "b", "c", "f", "g", "h", "k", "l", "m", "n", "o", "p"})) { prog.MutableBlock(0)->Var(v); } @@ -68,6 +72,9 @@ ProgramDesc BuildProgramDesc() { SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}); SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"}); SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}); + SetOp(&prog, "concat", "concat2", {"l", "m"}, {"n"}); + SetOp(&prog, "transpose2", "transpose", {"n"}, {"o"}); + SetOp(&prog, "reshape2", "reshape", {"o"}, {"p"}); return prog; } @@ -115,7 +122,7 @@ void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) { } TEST(Bfloat16PlacementPass, enable_all) { - MainTest({"conv2d", "pool2d", "relu", "concat"}, 6); + MainTest({"conv2d", "pool2d", "relu", "concat"}, 7); } TEST(Bfloat16PlacementPass, enabled_conv_and_pool) { @@ -123,7 +130,7 @@ TEST(Bfloat16PlacementPass, enabled_conv_and_pool) { MainTest({"conv2d", "pool2d"}, 3); } -TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); } +TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(5); } } // namespace ir } // namespace framework diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index b2815cbdc65b53..bb475b4e543660 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -221,5 +221,6 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(concat, MKLDNN, ::paddle::platform::CPUPlace, ops::ConcatMKLDNNOpKernel, + ops::ConcatMKLDNNOpKernel, ops::ConcatMKLDNNOpKernel, ops::ConcatMKLDNNOpKernel); diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index 398bdb01b5c240..28cdd8413ab134 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -142,6 +142,11 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, ops::kTransposeMKLDNNINT8, ops::TransposeMKLDNNOpKernel); +REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE( + transpose2, MKLDNN, ::paddle::platform::CPUPlace, BF16, + ops::kTransposeMKLDNNFP32, + ops::TransposeMKLDNNOpKernel); + REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace, ops::TransposeMKLDNNOpKernel); diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index aa8e39037062e5..7cf85420c579b6 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -622,7 +622,9 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int8_t, ops::ReshapeKernel, uint8_t, ops::ReshapeKernel, int, ops::ReshapeKernel, int64_t, ops::ReshapeKernel, - bool, ops::ReshapeKernel); + bool, ops::ReshapeKernel, + paddle::platform::bfloat16, ops::ReshapeKernel); + REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, double, ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, int64_t, diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py new file mode 100644 index 00000000000000..1179556f915be8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import struct + +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 +from paddle import enable_static + + +@unittest.skipIf(not core.supports_bfloat16(), + "place does not support BF16 evaluation") +class TestConcatBf16Op(OpTest): + def setUp(self): + enable_static() + self.op_type = "concat" + self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" + self.init_axis() + self.init_shape() + self.init_test_data() + self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]} + self.attrs = { + 'axis': self.axis, + 'use_mkldnn': True, + 'mkldnn_data_type': self.mkldnn_data_type + } + + self.output = np.concatenate( + (self.x0, self.x1, self.x2), axis=self.axis).astype(np.uint16) + self.outputs = {'Out': self.output} + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace()) + +# --------------------test concat bf16 in with axis 0-------------------- + + def init_test_data(self): + self.x0 = convert_float_to_uint16( + np.random.random(self.x0_shape).astype(np.float32)) + self.x1 = convert_float_to_uint16( + np.random.random(self.x1_shape).astype(np.float32)) + self.x2 = convert_float_to_uint16( + np.random.random(self.x2_shape).astype(np.float32)) + + def init_axis(self): + self.axis = 0 + + def init_shape(self): + self.x0_shape = [2, 2, 1, 2] + self.x1_shape = [1, 2, 1, 2] + self.x2_shape = [3, 2, 1, 2] + + +# --------------------test concat bf16 in with axis 1-------------------- + + +class TestAxis1Case(TestConcatBf16Op): + def init_axis(self): + self.axis = 1 + + def init_shape(self): + self.x0_shape = [1, 1, 5, 5] + self.x1_shape = [1, 2, 5, 5] + self.x2_shape = [1, 3, 5, 5] + + +# --------------------test concat bf16 in with axis 2-------------------- + + +class TestAxis2Case(TestConcatBf16Op): + def init_axis(self): + self.axis = 2 + + def init_shape(self): + self.x0_shape = [2, 3, 4, 5] + self.x1_shape = [2, 3, 5, 5] + self.x2_shape = [2, 3, 6, 5] + + +# --------------------test concat bf16 in with axis 3-------------------- + + +class TestAxis3Case(TestConcatBf16Op): + def init_axis(self): + self.axis = 3 + + def init_shape(self): + self.x0_shape = [2, 3, 5, 5] + self.x1_shape = [2, 3, 5, 6] + self.x2_shape = [2, 3, 5, 7] + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py new file mode 100644 index 00000000000000..854ddb17fb275a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py @@ -0,0 +1,62 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import struct + +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 +from paddle import enable_static + + +@unittest.skipIf(not core.supports_bfloat16(), + "place does not support BF16 evaluation") +class TestReshapeBf16Op(OpTest): + def setUp(self): + enable_static() + self.op_type = "reshape2" + self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" + self.init_data() + self.init_input_data() + + self.inputs = {'X': self.input_data} + self.attrs = { + 'shape': self.new_shape, + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type + } + self.outputs = { + "Out": self.inputs["X"].reshape(self.infered_shape), + 'XShape': np.random.random(self.ori_shape).astype(np.float32) + } + + def init_data(self): + self.ori_shape = (10, 2, 6) + self.new_shape = (10, 0, 3, -1) + self.infered_shape = (10, 2, 3, -1) + + def init_input_data(self): + self.input_data = convert_float_to_uint16( + np.random.random(self.ori_shape).astype(np.float32)) + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace(), no_check_set=['XShape']) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py new file mode 100644 index 00000000000000..de04cecbf4c9bc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py @@ -0,0 +1,66 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 +from paddle import enable_static + + +@unittest.skipIf(not core.supports_bfloat16(), + "place does not support BF16 evaluation") +class TestTransposeOp(OpTest): + def setUp(self): + enable_static() + self.op_type = "transpose2" + self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" + self.init_test_case() + self.init_test_data() + self.axis = (0, 2, 3, 1) + + self.inputs = {'X': self.input_data} + + self.attrs = { + 'axis': list(self.axis), + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type + } + + self.outputs = { + 'XShape': np.random.random(self.shape).astype(np.uint16), + 'Out': self.inputs['X'].transpose(self.axis) + } + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace(), no_check_set=['XShape']) + + def init_test_case(self): + self.shape = (2, 3, 4, 5) + + def init_test_data(self): + self.input_data = convert_float_to_uint16( + np.random.random(self.shape).astype(np.float32)) + + +class TestBF16Case(TestTransposeOp): + def init_test_case(self): + self.shape = (2, 4, 6, 8) + + +if __name__ == '__main__': + unittest.main() From 3ccc0a2f5e059772565d1411372148035e337b10 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Thu, 29 Oct 2020 11:40:35 +0800 Subject: [PATCH 06/10] enable test_parallel_executor_fetch_isolated_var (#28219) * enable test_parallel_executor_fetch_isolated_var, test=develop * add enable_static, test=develop * set test_parallel_executor_fetch_isolated_var RUN_TYPE=DIST, develop=test --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 2 +- .../unittests/test_parallel_executor_fetch_isolated_var.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 4cd9d9e530d871..52950a4d92a71a 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -339,7 +339,6 @@ list(REMOVE_ITEM TEST_OPS test_conv3d_transpose_op) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) list(REMOVE_ITEM TEST_OPS test_sampling_id_op) -list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_isolated_var) if (APPLE OR WIN32) list(REMOVE_ITEM TEST_OPS test_dataset) @@ -587,6 +586,7 @@ set_tests_properties(test_parallel_executor_crf test_sync_batch_norm_op test_inp test_parallel_executor_seresnext_with_reduce_gpu test_parallel_executor_seresnext_with_fuse_all_reduce_gpu test_parallel_executor_profiler + test_parallel_executor_fetch_isolated_var PROPERTIES LABELS "RUN_TYPE=DIST") if(NOT WIN32 AND NOT APPLE) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py index 13932238705f5b..d64aa510f4e1a5 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py @@ -16,6 +16,7 @@ import numpy as np import six import paddle.fluid as fluid +import paddle def enable_parallel_ssa_executor(enabled=True): @@ -57,6 +58,7 @@ def test_main(self): def run_impl(self, use_gpu, dev_cnt, is_training, use_experimental_executor, use_parallel_ssa_executor): + paddle.enable_static() enable_parallel_ssa_executor(use_parallel_ssa_executor) if fluid.is_compiled_with_cuda(): From f763cb81a697a364e57f0b6104af4a5d6d96b7d5 Mon Sep 17 00:00:00 2001 From: iducn <45056973+iducn@users.noreply.github.com> Date: Thu, 29 Oct 2020 12:27:09 +0800 Subject: [PATCH 07/10] Modify the shell script according to the specification (#28302) * 01:Modify the shell script according to the specification * 01:Modify the shell script according to the specification --- paddle/.set_port.sh | 6 +- paddle/.set_python_path.sh | 8 +- paddle/fluid/inference/api/demo_ci/clean.sh | 3 +- paddle/fluid/inference/api/demo_ci/run.sh | 175 +++++++++--------- paddle/fluid/inference/check_symbol.sh | 12 +- paddle/fluid/train/demo/clean.sh | 2 +- paddle/fluid/train/demo/run.sh | 12 +- paddle/fluid/train/imdb_demo/run.sh | 2 +- paddle/scripts/paddle_docker_build.sh | 32 ++-- tools/cudaError/start.sh | 4 +- .../dockerfile/build_scripts/install_nccl2.sh | 4 +- tools/gen_alias_mapping.sh | 4 +- .../manylinux1/build_scripts/install_nccl2.sh | 21 ++- 13 files changed, 154 insertions(+), 131 deletions(-) diff --git a/paddle/.set_port.sh b/paddle/.set_port.sh index 617ac79a24889e..e71f494aadf2c5 100755 --- a/paddle/.set_port.sh +++ b/paddle/.set_port.sh @@ -13,6 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -DIRNAME=`dirname $0` -source $DIRNAME/.common_test_util.sh -set_port $@ +DIRNAME="$(dirname "$0")" +sh "$DIRNAME"/.common_test_util.sh +set_port "$@" diff --git a/paddle/.set_python_path.sh b/paddle/.set_python_path.sh index 8fd58925ee4820..8da4565be617bd 100755 --- a/paddle/.set_python_path.sh +++ b/paddle/.set_python_path.sh @@ -24,12 +24,14 @@ PYPATH="" set -x while getopts "d:" opt; do - case $opt in + case "$opt" in d) PYPATH=$OPTARG ;; + *) + ;; esac done -shift $(($OPTIND - 1)) +shift $(("$OPTIND" - 1)) export PYTHONPATH=$PYPATH:$PYTHONPATH -$@ +"$@" diff --git a/paddle/fluid/inference/api/demo_ci/clean.sh b/paddle/fluid/inference/api/demo_ci/clean.sh index 0d9f3d2aa237ac..5f603465776f1e 100755 --- a/paddle/fluid/inference/api/demo_ci/clean.sh +++ b/paddle/fluid/inference/api/demo_ci/clean.sh @@ -1,4 +1,5 @@ +#!/bin/bash set -x -cd `dirname $0` +cd "$(dirname "$0")" || exit rm -rf build/ data/ set +x diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index 6d283ca56cb652..aee013e8f36528 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -1,29 +1,29 @@ #!/bin/bash set -x -PADDLE_ROOT=$1 -TURN_ON_MKL=$2 # use MKL or Openblas -TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode -DATA_DIR=$4 # dataset -TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, default to /usr/local/TensorRT/include -TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib -MSVC_STATIC_CRT=$7 -inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir +PADDLE_ROOT="$1" +TURN_ON_MKL="$2" # use MKL or Openblas +TEST_GPU_CPU="$3" # test both GPU/CPU mode or only CPU mode +DATA_DIR="$4" # dataset +TENSORRT_INCLUDE_DIR="$5" # TensorRT header file dir, default to /usr/local/TensorRT/include +TENSORRT_LIB_DIR="$6" # TensorRT lib file dir, default to /usr/local/TensorRT/lib +MSVC_STATIC_CRT="$7" +inference_install_dir="${PADDLE_ROOT}"/build/paddle_inference_install_dir -cd `dirname $0` -current_dir=`pwd` -if [ $2 == ON ]; then +cd "$(dirname "$0")" || exit +current_dir=$(pwd) +if [ "$2" == ON ]; then # You can export yourself if move the install path - MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB} + MKL_LIB="${inference_install_dir}"/third_party/install/mklml/lib + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH":"${MKL_LIB}" fi -if [ $3 == ON ]; then +if [ "$3" == ON ]; then use_gpu_list='true false' else use_gpu_list='false' fi USE_TENSORRT=OFF -if [ -d "$TENSORRT_INCLUDE_DIR" -a -d "$TENSORRT_LIB_DIR" ]; then +if [ -d "$TENSORRT_INCLUDE_DIR" ] && [ -d "$TENSORRT_LIB_DIR" ]; then USE_TENSORRT=ON fi @@ -32,77 +32,79 @@ URL_ROOT=http://paddlemodels.bj.bcebos.com/${PREFIX} # download vis_demo data function download() { - dir_name=$1 - mkdir -p $dir_name - cd $dir_name + dir_name="$1" + mkdir -p "$dir_name" + cd "$dir_name" || exit if [[ -e "${PREFIX}${dir_name}.tar.gz" ]]; then echo "${PREFIX}${dir_name}.tar.gz has been downloaded." else - wget -q ${URL_ROOT}$dir_name.tar.gz - tar xzf *.tar.gz + wget -q "${URL_ROOT}""$dir_name".tar.gz + tar xzf ./*.tar.gz fi - cd .. + cd .. || exit } -mkdir -p $DATA_DIR -cd $DATA_DIR +mkdir -p "$DATA_DIR" +cd "$DATA_DIR" || exit vis_demo_list='se_resnext50 ocr mobilenet' for vis_demo_name in $vis_demo_list; do - download $vis_demo_name + download "$vis_demo_name" done # download word2vec data mkdir -p word2vec -cd word2vec +cd word2vec || exit if [[ -e "word2vec.inference.model.tar.gz" ]]; then echo "word2vec.inference.model.tar.gz has been downloaded." else wget -q http://paddle-inference-dist.bj.bcebos.com/word2vec.inference.model.tar.gz - tar xzf *.tar.gz + tar xzf ./*.tar.gz fi # compile and test the demo -cd $current_dir +cd "$current_dir" || exit mkdir -p build -cd build -rm -rf * +cd build || exit +rm -rf ./* for WITH_STATIC_LIB in ON OFF; do - if [ $(echo `uname` | grep "Win") != "" ]; then + if [ "$(uname | grep Win)" != "" ]; then # -----simple_on_word2vec on windows----- - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ - -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" \ + -DMSVC_STATIC_CRT="$MSVC_STATIC_CRT" msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln for use_gpu in $use_gpu_list; do Release/simple_on_word2vec.exe \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --dirname="$DATA_DIR"/word2vec/word2vec.inference.model \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "simple_on_word2vec demo runs fail." exit 1 fi done # -----vis_demo on windows----- - rm -rf * - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + rm -rf ./* + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ - -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" \ + -DMSVC_STATIC_CRT="$MSVC_STATIC_CRT" msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln for use_gpu in $use_gpu_list; do for vis_demo_name in $vis_demo_list; do Release/vis_demo.exe \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --modeldir="$DATA_DIR"/"$vis_demo_name"/model \ + --data="$DATA_DIR"/"$vis_demo_name"/data.txt \ + --refer="$DATA_DIR"/"$vis_demo_name"/result.txt \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "vis demo $vis_demo_name runs fail." exit 1 fi @@ -110,63 +112,66 @@ for WITH_STATIC_LIB in ON OFF; do done else # -----simple_on_word2vec on linux/mac----- - rm -rf * - cmake .. -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + rm -rf ./* + cmake .. -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB - make -j$(nproc) - word2vec_model=$DATA_DIR'/word2vec/word2vec.inference.model' - if [ -d $word2vec_model ]; then + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" + make -j"$(nproc)" + word2vec_model="$DATA_DIR"'/word2vec/word2vec.inference.model' + if [ -d "$word2vec_model" ]; then for use_gpu in $use_gpu_list; do ./simple_on_word2vec \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --dirname="$DATA_DIR"/word2vec/word2vec.inference.model \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "simple_on_word2vec demo runs fail." exit 1 fi done fi # ---------vis_demo on linux/mac--------- - rm -rf * - cmake .. -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + rm -rf ./* + cmake .. -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB - make -j$(nproc) + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" + make -j"$(nproc)" for use_gpu in $use_gpu_list; do for vis_demo_name in $vis_demo_list; do ./vis_demo \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=$use_gpu - if [ $? -ne 0 ]; then + --modeldir="$DATA_DIR"/"$vis_demo_name"/model \ + --data="$DATA_DIR"/"$vis_demo_name"/data.txt \ + --refer="$DATA_DIR"/"$vis_demo_name"/result.txt \ + --use_gpu="$use_gpu" + EXCODE="$?" + if [ "$EXCODE" -ne 0 ]; then echo "vis demo $vis_demo_name runs fail." exit 1 fi done done # --------tensorrt mobilenet on linux/mac------ - if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then - rm -rf * - cmake .. -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ + if [ "$USE_TENSORRT" == ON ] && [ "$TEST_GPU_CPU" == ON ]; then + rm -rf ./* + cmake .. -DPADDLE_LIB="${inference_install_dir}" \ + -DWITH_MKL="$TURN_ON_MKL" \ -DDEMO_NAME=trt_mobilenet_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ - -DUSE_TENSORRT=$USE_TENSORRT \ - -DTENSORRT_INCLUDE_DIR=$TENSORRT_INCLUDE_DIR \ - -DTENSORRT_LIB_DIR=$TENSORRT_LIB_DIR - make -j$(nproc) + -DWITH_GPU="$TEST_GPU_CPU" \ + -DWITH_STATIC_LIB="$WITH_STATIC_LIB" \ + -DUSE_TENSORRT="$USE_TENSORRT" \ + -DTENSORRT_INCLUDE_DIR="$TENSORRT_INCLUDE_DIR" \ + -DTENSORRT_LIB_DIR="$TENSORRT_LIB_DIR" + make -j"$(nproc)" ./trt_mobilenet_demo \ - --modeldir=$DATA_DIR/mobilenet/model \ - --data=$DATA_DIR/mobilenet/data.txt \ - --refer=$DATA_DIR/mobilenet/result.txt - if [ $? -ne 0 ]; then + --modeldir="$DATA_DIR"/mobilenet/model \ + --data="$DATA_DIR"/mobilenet/data.txt \ + --refer="$DATA_DIR"/mobilenet/result.txt + EXCODE="$?" + if [ "$EXCODE" != 0 ]; then echo "trt demo trt_mobilenet_demo runs fail." exit 1 fi diff --git a/paddle/fluid/inference/check_symbol.sh b/paddle/fluid/inference/check_symbol.sh index a0f64796576c85..0c66946c4b8a1e 100755 --- a/paddle/fluid/inference/check_symbol.sh +++ b/paddle/fluid/inference/check_symbol.sh @@ -1,12 +1,12 @@ #!/bin/sh -lib=$1 -if [ $# -ne 1 ]; then echo "No input library"; exit -1 ; fi +lib="$1" +if [ "$#" -ne 1 ]; then echo "No input library"; exit 1 ; fi -num_paddle_syms=$(nm -D ${lib} | grep paddle | wc -l) -num_google_syms=$(nm -D ${lib} | grep google | grep -v paddle | grep "T " | wc -l) +num_paddle_syms=$(nm -D "${lib}" | grep -c paddle ) +num_google_syms=$(nm -D "${lib}" | grep google | grep -v paddle | grep -c "T " ) -if [ $num_paddle_syms -le 0 ]; then echo "Have no paddle symbols"; exit -1 ; fi -if [ $num_google_syms -ge 1 ]; then echo "Have some google symbols"; exit -1 ; fi +if [ "$num_paddle_syms" -le 0 ]; then echo "Have no paddle symbols"; exit 1 ; fi +if [ "$num_google_syms" -ge 1 ]; then echo "Have some google symbols"; exit 1 ; fi exit 0 diff --git a/paddle/fluid/train/demo/clean.sh b/paddle/fluid/train/demo/clean.sh index a2064492c08b84..192bdf8752c159 100755 --- a/paddle/fluid/train/demo/clean.sh +++ b/paddle/fluid/train/demo/clean.sh @@ -15,6 +15,6 @@ # limitations under the License. set -x -cd "$(dirname "$0")" +cd "$(dirname "$0")" || exit rm -rf build/ set +x diff --git a/paddle/fluid/train/demo/run.sh b/paddle/fluid/train/demo/run.sh index 2955e7574daa2d..a9c0ed4ac68a2a 100755 --- a/paddle/fluid/train/demo/run.sh +++ b/paddle/fluid/train/demo/run.sh @@ -14,14 +14,14 @@ function download() { download # build demo trainer -paddle_install_dir=${PADDLE_ROOT}/build/paddle_install_dir +paddle_install_dir="${PADDLE_ROOT}"/build/paddle_install_dir mkdir -p build -cd build -rm -rf * -cmake .. -DPADDLE_LIB=$paddle_install_dir \ - -DWITH_MKLDNN=$TURN_ON_MKL \ - -DWITH_MKL=$TURN_ON_MKL +cd build || exit +rm -rf ./* +cmake .. -DPADDLE_LIB="$paddle_install_dir" \ + -DWITH_MKLDNN="$TURN_ON_MKL" \ + -DWITH_MKL="$TURN_ON_MKL" make cd .. diff --git a/paddle/fluid/train/imdb_demo/run.sh b/paddle/fluid/train/imdb_demo/run.sh index f71b4bac602a9e..8a585c614e53fe 100644 --- a/paddle/fluid/train/imdb_demo/run.sh +++ b/paddle/fluid/train/imdb_demo/run.sh @@ -1,3 +1,3 @@ - +#!/bin/bash set -exu build/demo_trainer --flagfile="train.cfg" diff --git a/paddle/scripts/paddle_docker_build.sh b/paddle/scripts/paddle_docker_build.sh index d6b639d0da2a54..fdd0d490a6fdb7 100755 --- a/paddle/scripts/paddle_docker_build.sh +++ b/paddle/scripts/paddle_docker_build.sh @@ -15,14 +15,14 @@ # limitations under the License. function start_build_docker() { - docker pull $IMG + docker pull "$IMG" apt_mirror='s#http://archive.ubuntu.com/ubuntu#mirror://mirrors.ubuntu.com/mirrors.txt#g' DOCKER_ENV=$(cat <\t,,,... -PADDLE_ROOT="$(dirname $(readlink -f ${BASH_SOURCE[0]}))/.." +PADDLE_ROOT="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." -find ${PADDLE_ROOT}/python/ -name '*.py' \ +find "${PADDLE_ROOT}"/python/ -name '*.py' \ | xargs grep -v '^#' \ | grep 'DEFINE_ALIAS' \ | perl -ne ' diff --git a/tools/manylinux1/build_scripts/install_nccl2.sh b/tools/manylinux1/build_scripts/install_nccl2.sh index 0c9bf1409d90d8..c2adf6a79de4bb 100644 --- a/tools/manylinux1/build_scripts/install_nccl2.sh +++ b/tools/manylinux1/build_scripts/install_nccl2.sh @@ -1,4 +1,19 @@ #!/bin/bash + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") if [ "$VERSION" == "10.0" ]; then DEB="nccl-repo-ubuntu1604-2.4.7-ga-cuda10.0_1-1_amd64.deb" @@ -24,10 +39,10 @@ wget -q -O $DIR/$DEB $URL cd $DIR && ar x $DEB && tar xf data.tar.xz DEBS=$(find ./var/ -name "*.deb") for sub_deb in $DEBS; do - echo $sub_deb - ar x $sub_deb && tar xf data.tar.xz + echo "$sub_deb" + ar x "$sub_deb" && tar xf data.tar.xz done mv -f usr/include/nccl.h /usr/local/include/ mv -f usr/lib/x86_64-linux-gnu/libnccl* /usr/local/lib/ rm /usr/include/nccl.h -rm -rf $DIR +rm -rf "$DIR" From e1fb46739ab082937edb9fa9c910cde18b712045 Mon Sep 17 00:00:00 2001 From: joejiong Date: Thu, 29 Oct 2020 13:59:20 +0800 Subject: [PATCH 08/10] move cinn dockerfile to dockerfile folder (#28281) --- .../Dockerfile.cuda10_ubuntu18_cinn | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 tools/dockerfile/Dockerfile.cuda10_ubuntu18_cinn diff --git a/tools/dockerfile/Dockerfile.cuda10_ubuntu18_cinn b/tools/dockerfile/Dockerfile.cuda10_ubuntu18_cinn new file mode 100644 index 00000000000000..964f082b561371 --- /dev/null +++ b/tools/dockerfile/Dockerfile.cuda10_ubuntu18_cinn @@ -0,0 +1,152 @@ +# A image for building paddle binaries +# Use cuda devel base image for both cpu and gpu environment +# When you modify it, please be aware of cudnn-runtime version +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 +MAINTAINER PaddlePaddle Authors + +# ENV variables +ARG WITH_GPU +ARG WITH_AVX + +ENV WITH_GPU=${WITH_GPU:-ON} +ENV WITH_AVX=${WITH_AVX:-ON} +ENV DEBIAN_FRONTEND=noninteractive + +ENV HOME /root +# Add bash enhancements +COPY paddle/scripts/docker/root/ /root/ + +RUN apt-get update && \ + apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y curl wget vim git unzip unrar tar xz-utils bzip2 gzip \ + coreutils ntp language-pack-zh-hans python-qt4 libsm6 libxext6 libxrender-dev + + +# Downgrade gcc&&g++ +WORKDIR /usr/bin + RUN apt-get update --fix-missing + COPY tools/dockerfile/build_scripts /build_scripts + RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts + RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ + ENV PATH=/usr/local/gcc-8.2/bin:$PATH + +RUN apt-get update && \ + apt-get install -y python2.7 python2.7-dev \ + python3.5 python3.5-dev \ + python3.6 python3.6-dev \ + python3.7 python3.7-dev \ + python3.8 python3.8-dev && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python2.7 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.5 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.6 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.7 && easy_install pip && \ + curl https://bootstrap.pypa.io/ez_setup.py -o - | python3.8 && easy_install pip && \ + rm /usr/bin/python && ln -s /usr/bin/python2.7 /usr/bin/python && \ + rm /usr/bin/python3 && ln -s /usr/bin/python3.5 /usr/bin/python3 && \ + rm /usr/local/bin/pip && ln -s /usr/local/bin/pip2.7 /usr/local/bin/pip && \ + rm /usr/local/bin/pip3 && ln -s /usr/local/bin/pip3.5 /usr/local/bin/pip3 + + +# install cmake +WORKDIR /home +RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && tar -zxvf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz +ENV PATH=/home/cmake-3.16.0-Linux-x86_64/bin:$PATH + + +# remove them when apt-get support 2.27 and higher version +RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ + tar -xzf binutils-2.33.1.tar.gz && \ + cd binutils-2.33.1 && \ + ./configure && make -j && make install && cd .. && rm -rf binutils-2.33.1 binutils-2.33.1.tar.gz + + +# Install Go and glide +RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin +# install glide +RUN curl -s -q https://glide.sh/get | sh + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN pip3 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip3.6 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3.6 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip3.7 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3.7 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip3.8 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip3.8 --no-cache-dir install ipykernel==4.6.0 wheel && \ + pip --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ + pip --no-cache-dir install ipykernel==4.6.0 wheel + +#For docstring checker +RUN pip3 --no-cache-dir install pylint pytest astroid isort && \ + pip3.6 --no-cache-dir install pylint pytest astroid isort && \ + pip3.7 --no-cache-dir install pylint pytest astroid isort && \ + pip3.8 --no-cache-dir install pylint pytest astroid isort && \ + pip --no-cache-dir install pylint pytest astroid isort + +COPY ./python/requirements.txt /root/ +RUN pip3 --no-cache-dir install -r /root/requirements.txt && \ + pip3.6 --no-cache-dir install -r /root/requirements.txt && \ + pip3.7 --no-cache-dir install -r /root/requirements.txt && \ + pip3.8 --no-cache-dir install -r /root/requirements.txt && \ + pip --no-cache-dir install -r /root/requirements.txt + + +# Older versions of patchelf limited the size of the files being processed and were fixed in this pr. +# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa +# So install a newer version here. +RUN wget -q http://mirrors.kernel.org/ubuntu/pool/universe/p/patchelf/patchelf_0.10-2_amd64.deb && \ + dpkg -i patchelf_0.10-2_amd64.deb + +# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service +#RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config +#CMD source ~/.bashrc + +# ccache 3.7.9 +RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ + tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \ + ./configure -prefix=/usr/local/ccache-3.7.9 && \ + make -j8 && make install && \ + ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache + +# For CINN environment +RUN apt update --fix-missing +RUN apt-get install autoconf autogen +RUN apt-get install libtool +RUN apt-get install zlib1g-dev +RUN apt install libginac-dev -y +RUN apt install clang cmake -y +RUN python3 -m pip install numpy +RUN python3 -m pip install pybind11 + + +# Install LLVM +RUN echo "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/source.list +RUN echo "deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" >> /etc/apt/source.list +RUN echo "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main" >> /etc/apt/source.list +RUN echo "deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main" >> /etc/apt/source.list +RUN ln -s /usr/bin/llvm-config-6.0 /usr/bin/llvm-config +RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|apt-key add - + +RUN apt update +RUN apt install libclang-dev llvm-10 llvm-10-dev libclang-10-dev -y + + +EXPOSE 22 From 1c385e26f9e6727c66f971a33f930c59f75a6639 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Thu, 29 Oct 2020 15:05:51 +0800 Subject: [PATCH 09/10] add op_function_generator for box_coder (#28303) * add op_function_generator for box_coder * fix format --- paddle/fluid/pybind/op_function_generator.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index cac44173c17727..92006bff2cc16c 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -53,6 +53,7 @@ std::map> op_ins_map = { {"X", "W", "Label", "PathTable", "PathCode", "Bias"}}, {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}}, {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}}, + {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}}, }; // NOTE(zhiqiu): Like op_ins_map. From 8cd1c102d9ec0d04071422af999d04f3840a931b Mon Sep 17 00:00:00 2001 From: lidanqing Date: Thu, 29 Oct 2020 14:32:08 +0100 Subject: [PATCH 10/10] Enable GRU infer model running CAPI (#28313) * enable infer model running CAPI * output size should bigger than 0 --- .../inference/tests/api/analyzer_lexical_analysis_gru_tester.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc index e4035c80341379..7c5757ce9d4c63 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc @@ -190,7 +190,7 @@ std::vector Lexical_Test( // return acc_res; } else { EXPECT_GT(outputs->size(), 0UL); - EXPECT_EQ(outputs[0].size(), 1UL); + EXPECT_GT(outputs[0].size(), 0UL); LOG(INFO) << "No accuracy result. To get accuracy result provide a model " "with accuracy layers in it and use --with_accuracy_layer " "option.";