Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… step_lr_on_epoch_end
  • Loading branch information
heavengate committed Oct 10, 2020
2 parents 3791c00 + c4b1faa commit 4d5e51a
Show file tree
Hide file tree
Showing 174 changed files with 6,322 additions and 1,677 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pip install paddlepaddle
# Linux GPU cuda10cudnn7
pip install paddlepaddle-gpu
# Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu==1.8.4.post97
pip install paddlepaddle-gpu==1.8.5.post97
```
It is recommended to read [this doc](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/install/index_en.html) on our website.
Expand Down
2 changes: 1 addition & 1 deletion README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pip install paddlepaddle
# Linux GPU cuda10cudnn7
pip install paddlepaddle-gpu
# Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu==1.8.4.post97
pip install paddlepaddle-gpu==1.8.5.post97
```
更多安装信息详见官网 [安装说明](http://www.paddlepaddle.org.cn/documentation/docs/zh/1.8/beginners_guide/install/index_cn.html)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/mkldnn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_REPOSITORY https://github.com/oneapi-src/oneDNN.git)
SET(MKLDNN_TAG 64a48f9565aa72f6359917b3406328075a409939)
SET(MKLDNN_TAG 361725600224f41b7347a1c6bee9b04d1e6c14d7)

# Introduce variables:
# * CMAKE_INSTALL_LIBDIR
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/data_layout_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
// As MKL-DNN description was in NCHW and paddle is expecting NHWC
platform::MatchShapeToLayout(out, in_layout, out_layout);

out->set_layout(out_layout);
out->set_layout(DataLayout::kNCHW);
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
out->set_format(MKLDNNMemoryFormat::undef);
}
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ void SetTensorToVariable(const Variable &in_var, const Tensor &tensor,
auto *tran_lod_tensor = out_var->GetMutable<LoDTensor>();
tran_lod_tensor->set_lod(in_lod_tensor.lod());
tran_lod_tensor->set_layout(in_lod_tensor.layout());
#ifdef PADDLE_WITH_MKLDNN
tran_lod_tensor->set_format(in_lod_tensor.format());
#endif
tran_lod_tensor->ShareDataWith(tensor);
} else if (in_var.IsType<SelectedRows>()) {
auto &in_selected_rows = in_var.Get<SelectedRows>();
Expand Down
26 changes: 22 additions & 4 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1882,9 +1882,9 @@ PDNode *patterns::MultipleQuantize::operator()() {
PDNode *patterns::QuantizePlacement::operator()(
const std::unordered_set<std::string> &quantize_enabled_op_types) {
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>({"concat", "conv2d", "elementwise_add",
"fc", "matmul", "pool2d", "prior_box",
"relu", "reshape2", "transpose2"});
std::unordered_set<std::string>(
{"concat", "conv2d", "elementwise_add", "fc", "matmul", "pool2d",
"prior_box", "relu", "reshape2", "transpose2", "fusion_gru"});
if (!quantize_enabled_op_types.empty()) {
supported_op_types = quantize_enabled_op_types;
}
Expand All @@ -1894,7 +1894,8 @@ PDNode *patterns::QuantizePlacement::operator()(

PDNode *patterns::Bfloat16Placement::operator()(
const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
std::unordered_set<std::string> supported_op_types{"conv2d"};
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>({"conv2d", "fusion_gru"});
if (!bfloat16_enabled_op_types.empty()) {
supported_op_types = bfloat16_enabled_op_types;
}
Expand Down Expand Up @@ -2280,6 +2281,23 @@ PDNode *patterns::MatmulTransposeReshapePattern::operator()() {
return reshape_out;
}

PDNode *patterns::FusionGru::operator()() {
auto op = pattern->NewNode(op_repr())->assert_is_op("fusion_gru");
auto x = pattern->NewNode(x_repr())->AsInput()->assert_is_op_input(
"fusion_gru", "X");
auto weight_h = pattern->NewNode(weight_h_repr())
->AsInput()
->assert_is_op_input("fusion_gru", "WeightH");
auto weight_x = pattern->NewNode(weight_x_repr())
->AsInput()
->assert_is_op_input("fusion_gru", "WeightX");
auto out = pattern->NewNode(out_repr())
->AsOutput()
->assert_is_op_output("fusion_gru", "Hidden");
op->LinksFrom({x, weight_h, weight_x}).LinksTo({out});
return out;
}

} // namespace ir
} // namespace framework
} // namespace paddle
15 changes: 15 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -1312,6 +1312,21 @@ struct MatmulTransposeReshapePattern : public PatternBase {
PATTERN_DECL_NODE(reshape_out_xshape);
};

// fusion_gru op
// Forward pass for fusion_gru.
// fusion_gru out is a result of the operator.
struct FusionGru : public PatternBase {
FusionGru(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "fusion_gru") {}

PDNode* operator()();
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(x);
PATTERN_DECL_NODE(weight_h);
PATTERN_DECL_NODE(weight_x);
PATTERN_DECL_NODE(out);
};

} // namespace patterns

// Link two ir::Nodes from each other.
Expand Down
79 changes: 71 additions & 8 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,17 @@ enum { U8_MAX = 255, S8_MAX = 127 };

void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
std::string input_name, double scale_to_one,
bool is_unsigned,
std::string scale_attr_name) const {
bool is_input_unsigned,
std::string scale_attr_name, float shift,
std::string shift_attr_name) const {
auto inputs = op->Op()->InputNames();
bool name_found =
std::find(inputs.begin(), inputs.end(), input_name) != inputs.end();
PADDLE_ENFORCE_EQ(name_found, true,
platform::errors::InvalidArgument(
"Var(%s) isn't the input of the %s operator.",
input_name, op->Op()->Type()));
unsigned max = is_unsigned ? U8_MAX : S8_MAX;
unsigned max = is_input_unsigned ? U8_MAX : S8_MAX;
float scale = scale_to_one * max;

// Create quantize output variable
Expand All @@ -86,7 +87,8 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("Scale", scale);
q_desc.SetAttr("is_negative_input", !is_unsigned);
q_desc.SetAttr("Shift", shift);
q_desc.SetAttr("is_negative_input", !is_input_unsigned);

q_desc.SetAttr("output_format",
Has("data_layout") ? Get<std::string>("data_layout") : "NHWC");
Expand All @@ -103,11 +105,13 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
IR_NODE_LINK_TO(quantize_out_node, op);

if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
if (!shift_attr_name.empty()) op->Op()->SetAttr(shift_attr_name, shift);
}

void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
bool are_unsigned,
std::string scale_attr_name) const {
bool are_inputs_unsigned,
std::string scale_attr_name, float shift,
std::string shift_attr_name) const {
auto inputs = op->inputs;
auto output = op->outputs[0];
PADDLE_ENFORCE_GE(inputs.size(), 1,
Expand All @@ -127,7 +131,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std::vector<std::string> quantize_out_node_names(inputs.size());

double scale_out = GetScaleValueForNode(output);
unsigned max = are_unsigned ? U8_MAX : S8_MAX;
unsigned max = are_inputs_unsigned ? U8_MAX : S8_MAX;
float scale = scale_out * max;

for (size_t i = 0; i < inputs.size(); i++) {
Expand All @@ -137,10 +141,11 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
quantize_out_node_names[i] = quantize_out_nodes[i]->Name();

q_desc.SetAttr("Scale", scale);
q_desc.SetAttr("Shift", shift);
q_desc.SetInput("Input", std::vector<std::string>({inputs[i]->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node_names[i]}));
q_desc.SetAttr("is_negative_input", !are_unsigned);
q_desc.SetAttr("is_negative_input", !are_inputs_unsigned);
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.

// link quantize op
Expand All @@ -154,6 +159,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
op->Op()->SetInput(input_name, quantize_out_node_names);

if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
if (!shift_attr_name.empty()) op->Op()->SetAttr(shift_attr_name, shift);
}

void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
Expand Down Expand Up @@ -782,6 +788,62 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
quantize_elementwise_add_count);
}

void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const {
GraphPatternDetector gpd;
patterns::FusionGru pattern{gpd.mutable_pattern(), name_scope_};
pattern();

int quantize_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "Quantize fusion_gru op";
GET_IR_NODE_FROM_SUBGRAPH(op, op, pattern);

// skip if should not be quantized
if (!platform::HasOpINT8DataType(op->Op())) {
LogQuantizationDisabled(op);
return;
}

GET_IR_NODE_FROM_SUBGRAPH(x, x, pattern);
GET_IR_NODE_FROM_SUBGRAPH(weight_h, weight_h, pattern);
GET_IR_NODE_FROM_SUBGRAPH(weight_x, weight_x, pattern);
GET_IR_NODE_FROM_SUBGRAPH(out, out, pattern);

if (!AreScalesPresentForNodes(op, {x, weight_h, weight_x})) {
LogCannotQuantizeOp(op);
return;
}

bool is_x_unsigned{false};
auto input_x_scale = GetScaleValueForNode(x, &is_x_unsigned);

double input_x_shift{128.};
if (is_x_unsigned) input_x_shift = 0.;

QuantizeInput(g, op, x, "X", input_x_scale, is_x_unsigned, "Scale_data",
input_x_shift, "Shift_data");

auto weight_scale_tensor = GetScaleTensorForNode(weight_x);
EigenVectorArrayMap eigen_tensor{weight_scale_tensor.data<double>(),
weight_scale_tensor.numel(), 1};
eigen_tensor *= static_cast<double>(S8_MAX);
std::vector<float> scale_weights{
weight_scale_tensor.data<double>(),
weight_scale_tensor.data<double>() + weight_scale_tensor.numel()};

op->Op()->SetAttr("Scale_weights", scale_weights);
// return fp32 data
op->Op()->SetAttr("force_fp32_output", true);

++quantize_count;
};
gpd(graph, handler);
AddStatis(quantize_count);

PrettyLogDetail("--- quantized %d fusion_gru ops", quantize_count);
}

void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Quantizing the graph.";
PADDLE_ENFORCE_NOT_NULL(
Expand All @@ -801,6 +863,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeReshape(graph);
QuantizeMatmul(graph);
QuantizeElementwiseAdd(graph);
QuantizeFusionGru(graph);
}

} // namespace ir
Expand Down
19 changes: 7 additions & 12 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,31 +49,26 @@ class CPUQuantizePass : public FusePassBase {
void ApplyImpl(ir::Graph* graph) const override;

void QuantizeConv(Graph* graph, bool with_residual_data = false) const;

void QuantizeFc(Graph* graph) const;

void QuantizePool(Graph* graph) const;

void QuantizeConcat(Graph* graph) const;

void QuantizePriorBox(Graph* graph) const;

void QuantizeTranspose(Graph* graph) const;

void QuantizeReshape(Graph* graph) const;

void QuantizeMatmul(Graph* graph) const;

void QuantizeElementwiseAdd(Graph* graph) const;
void QuantizeFusionGru(Graph* graph) const;

void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
double scale_to_one, bool is_unsigned,
std::string scale_attr_name = "") const;
double scale_to_one, bool is_input_unsigned,
std::string scale_attr_name = "", float shift = 0.0,
std::string shift_attr_name = "") const;

// quantize all inputs of given name with the same (minimum) scale
void QuantizeInputs(Graph* g, Node* op, std::string input_name,
bool are_unsigned,
std::string scale_attr_name = "") const;
bool are_inputs_unsigned,
std::string scale_attr_name = "", float shift = 0.0,
std::string shift_attr_name = "") const;

void DequantizeOutput(Graph* g, Node* op, Node* output,
std::string output_name, double scale_to_one,
Expand Down
81 changes: 81 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetAttr("Scale_x", 1.0f);
op->SetAttr("Scale_y", 1.0f);
op->SetAttr("Scale_out", 1.0f);
} else if (type == "fusion_gru") {
op->SetInput("X", {inputs[0]});
op->SetInput("Bias", {inputs[1]});
op->SetInput("WeightX", {inputs[2]});
op->SetInput("WeightH", {inputs[3]});
op->SetOutput("Hidden", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("Scale_data", 1.0f);
op->SetAttr("Shift_data", 0.0f);
op->SetAttr("Weight_scale", std::vector<float>{1.0f});
}
}

Expand Down Expand Up @@ -389,6 +399,77 @@ TEST(CpuQuantizePass, transpose) {
quant_count, dequant_count, added_nodes_count, 2.0f * 127);
}

static const std::initializer_list<std::string> variable_names_fusion_gru = {
"x", "wx", "wh", "b", "h"};

// x->Fusion_gru->h
ProgramDesc BuildProgramDescFusionGru() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
auto* var = prog.MutableBlock(0)->Var(v);
if (v.find("wx") == 0 || v.find("wh") || v.find("b")) {
var->SetPersistable(true);
}
}

SetOp(&prog, "fusion_gru", "Fusion_gru", {"x", "wx", "wh", "b"}, {"h"}, true,
"int8");

return prog;
}

void MainTestFusionGru(const ProgramDesc& prog, int gru_count, int quant_count,
int dequant_count, int added_nodes_count, float scale,
float shift) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names_fusion_gru, &original_nodes_num,
&current_nodes_num);

int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int gru_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "fusion_gru") {
gru_nodes_count++;

auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name"));
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_data")), scale)
<< "Scale_data for node '" + op_name + "'.";
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Shift_data")), shift)
<< "Shift_data for node '" + op_name + "'.";
EXPECT_EQ(BOOST_GET_CONST(std::vector<float>,
op->GetAttr("Scale_weights"))[0],
scale)
<< "Scale_weights for node '" + op_name + "'.";
EXPECT_EQ(BOOST_GET_CONST(bool, op->GetAttr("force_fp32_output")), true)
<< "force_fp32_output for node '" + op_name + "'.";
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(gru_nodes_count, gru_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}

TEST(CpuQuantizePass, fusion_gru) {
// x->Fusion_gru->h
int gru_count = 1;
int quant_count = 1;
int dequant_count = 0;
// 1 Quant + 1 IN + 0 DeQuant + 0 OUT
int added_nodes_count = 1 + 1 + 0 + 0;
MainTestFusionGru(BuildProgramDescFusionGru(), gru_count, quant_count,
dequant_count, added_nodes_count, 2. * 127, 128.);
}

static const std::initializer_list<std::string> variable_names_reshape = {
"a", "w1", "b", "c", "d", "e", "f"};

Expand Down
Loading

0 comments on commit 4d5e51a

Please sign in to comment.