PaddlePaddle · pangyoki · Mar 18, 2022 · Mar 4, 2022 · Mar 7, 2022 · Mar 8, 2022
diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc
@@ -30,7 +30,8 @@ namespace egr_utils_api {
 
 bool IsLeafTensor(const paddle::experimental::Tensor& target) {
   std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(target);
-  if (std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node)) {
+  if (!grad_node ||
+      std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node)) {
     return true;
   }
 

diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc
@@ -36,6 +36,15 @@ class TensorWrapper {
   explicit TensorWrapper(const paddle::experimental::Tensor& tensor,
                          bool full_reserved = false,
                          bool no_need_buffer = false) {
+    // set inplace_version_snapshot_ according to tensor's current inplace
+    // version.
+    if (tensor.impl() && phi::DenseTensor::classof(tensor.impl().get())) {
+      phi::DenseTensor* dense_tensor =
+          static_cast<phi::DenseTensor*>(tensor.impl().get());
+      auto& inplace_version_counter = dense_tensor->InplaceVersionCounter();
+      inplace_version_snapshot_ = inplace_version_counter.CurrentVersion();
+    }
+
     /**
      * Normally, we should fully reserved all non-output or non-leaf fwd tensor
      * here. And for fwd output tensor, we should not reserve its autogradmeta,
@@ -49,6 +58,7 @@ class TensorWrapper {
     }
 
     // shallow copy tensor_impl here
+    no_need_buffer_ = no_need_buffer;
     if (no_need_buffer) {
       if (phi::DenseTensor::classof(tensor.impl().get())) {
         // Only Copy Meta
@@ -86,6 +96,7 @@ class TensorWrapper {
 
     // if it's full_reserved just return the full copy of tensor
     if (full_reserved_) {
+      check_inplace_version();
       return intermidiate_tensor_;
     } else {
       std::shared_ptr<GradNodeBase> new_grad_node = grad_node;
@@ -94,15 +105,52 @@ class TensorWrapper {
       intermidiate_tensor_.set_autograd_meta(
           std::static_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
               p_ab_autograd_meta));
+      check_inplace_version();
       return intermidiate_tensor_;
     }
   }
 
+  void check_inplace_version() {
+    if (no_need_buffer_) {
+      VLOG(6) << "There's no need to check inplace_version because "
+                 "no_need_buffer_ is true.";
+      return;
+    }
+    if (intermidiate_tensor_.impl() &&
+        phi::DenseTensor::classof(intermidiate_tensor_.impl().get())) {
+      phi::DenseTensor* dense_tensor =
+          static_cast<phi::DenseTensor*>(intermidiate_tensor_.impl().get());
+      auto& inplace_version_counter = dense_tensor->InplaceVersionCounter();
+
+      uint32_t current_inplace_version =
+          inplace_version_counter.CurrentVersion();
+      PADDLE_ENFORCE_EQ(
+          current_inplace_version, inplace_version_snapshot_,
+          paddle::platform::errors::PermissionDenied(
+              "Tensor '%s' used in gradient computation has been "
+              "modified by an inplace operation. "
+              "Its version is %d but the expected version is %d. "
+              "Please fix your code to void calling an inplace operator "
+              "after using the Tensor which will used in gradient "
+              "computation.",
+              intermidiate_tensor_.name(), current_inplace_version,
+              inplace_version_snapshot_));
+      VLOG(6) << " The inplace_version_snapshot_ of Tensor '"
+              << intermidiate_tensor_.name() << "' is [ "
+              << inplace_version_snapshot_ << " ]";
+      VLOG(6) << " The current_inplace_version of Tensor '"
+              << intermidiate_tensor_.name() << "' is [ "
+              << current_inplace_version << " ]";
+    }
+  }
+
   void clear() { intermidiate_tensor_.reset(); }
 
  private:
   bool full_reserved_ = false;
+  bool no_need_buffer_ = false;
   std::pair<size_t, size_t> out_rank_info_;
   paddle::experimental::Tensor intermidiate_tensor_;
+  uint32_t inplace_version_snapshot_ = 0;
 };
 }  // namespace egr
diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc
@@ -212,6 +212,27 @@ std::vector<std::shared_ptr<EagerVariable>> EagerUtils::CreateVars(
   return res;
 }
 
+void EagerUtils::ModifyInplaceInput(
+    const std::shared_ptr<EagerVariable>& inplace_variable,
+    paddle::experimental::Tensor* inplace_tensor) {
+  // Only modify the meta information of the inplace tensor, because
+  // EagerVariable cannot modify Tensor's meta information after inplace
+  // op (such as ``reshape``) is executed.
+  PADDLE_ENFORCE_NOT_NULL(inplace_tensor,
+                          paddle::platform::errors::Fatal(
+                              "Inplace Tensor is null and cannot be modified. "
+                              "We are tring to Modify Inplace Input from its "
+                              "shared_ptr, this error may indicate the inplace "
+                              " input is nullptr"));
+  if (phi::DenseTensor::classof(inplace_variable->GetTensorBase().get())) {
+    phi::DenseTensor* variable_dense_tensor =
+        static_cast<phi::DenseTensor*>(inplace_variable->GetTensorBase().get());
+    phi::DenseTensor* tensor_dense_tensor =
+        static_cast<phi::DenseTensor*>(inplace_tensor->impl().get());
+    tensor_dense_tensor->set_meta(variable_dense_tensor->meta());
+  }
+}
+
 std::vector<paddle::experimental::Tensor> EagerUtils::GetOutputs(
     const std::vector<std::shared_ptr<EagerVariable>>& outs) {
   std::vector<paddle::experimental::Tensor> res;

diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "paddle/fluid/eager/api/utils/tensor_utils.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/eager_tensor.h"
 #include "paddle/fluid/eager/grad_node_info.h"
@@ -144,6 +145,19 @@ class EagerUtils {
     iter.apply(std::forward<Args>(args)...);
   }
 
+  static void CheckInplace(const paddle::experimental::Tensor& target,
+                           const AutogradMeta* autograd_meta,
+                           bool require_any_grad) {
+    if (require_any_grad && autograd_meta) {
+      PADDLE_ENFORCE_EQ(!autograd_meta->StopGradient() &&
+                            egr::egr_utils_api::IsLeafTensor(target),
+                        false, paddle::platform::errors::InvalidArgument(
+                                   "Leaf Var (%s) that doesn't stop gradient "
+                                   "can't use inplace strategy.",
+                                   target.name()));
+    }
+  }
+
   // TensorWrapper Utils
   static paddle::experimental::Tensor RecoverTensorWrapper(
       TensorWrapper* tw, const std::shared_ptr<GradNodeBase>& grad_node);
@@ -171,6 +185,9 @@ class EagerUtils {
   static std::vector<std::shared_ptr<EagerVariable>> CreateVars(
       const size_t num);
   // Construct Tensor From var
+  static void ModifyInplaceInput(
+      const std::shared_ptr<EagerVariable>& inplace_variable,
+      paddle::experimental::Tensor* inplace_tensor);
   static std::vector<paddle::experimental::Tensor> GetOutputs(
       const std::vector<std::shared_ptr<EagerVariable>>& outs);
   static paddle::experimental::Tensor GetOutput(

diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
@@ -718,6 +718,15 @@ static PyObject* set_grad_type(TensorObject* self, PyObject* args,
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
+static PyObject* tensor__inplace_version(TensorObject* self, PyObject* args,
+                                         PyObject* kwargs) {
+  EAGER_TRY
+  uint32_t inplace_version = self->tensor.current_inplace_version();
+
+  return ToPyObject(inplace_version);
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
 PyMethodDef variable_methods[] = {
     {"numpy", (PyCFunction)(void (*)(void))tensor_method_numpy,
      METH_VARARGS | METH_KEYWORDS, NULL},
@@ -766,6 +775,8 @@ PyMethodDef variable_methods[] = {
      METH_VARARGS | METH_KEYWORDS, NULL},
     {"_set_grad_type", (PyCFunction)(void (*)(void))set_grad_type,
      METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_inplace_version", (PyCFunction)(void (*)(void))tensor__inplace_version,
+     METH_VARARGS | METH_KEYWORDS, NULL},
     {NULL, NULL, 0, NULL}};
 
 }  // namespace pybind

diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc
@@ -162,17 +162,22 @@ static inline std::string TempName(const std::string& name) {
 
 std::string GenerateOpFunctionsBody(
     const paddle::framework::proto::OpProto* op_proto, std::string func_name,
-    bool use_inplace_strategy = false,
     std::map<std::string, std::string> inplace_map = {}) {
   auto& op_type = op_proto->type();
   std::string input_args = "";
-  std::string call_api_str = "auto out = " + op_type + "_dygraph_function(";
+  std::string call_api_str = "";
   std::string ins_initializer_with_null = "";
   std::string py_arg = "";
   int arg_idx = 0;
   int input_args_num = 0;
   std::string ins_cast_str = "";
   std::string view_strategy_str = "";
+  if (!inplace_map.empty()) {
+    // change call_api_str for inplace op
+    call_api_str = "auto out = " + op_type + "__dygraph_function(";
+  } else {
+    call_api_str = "auto out = " + op_type + "_dygraph_function(";
+  }
   for (auto& input : op_proto->inputs()) {
     auto& in_name = input.name();
     // skip those dispensable inputs, like ResidualData in conv2d
@@ -288,8 +293,31 @@ std::string GenerateOpFunctionsBody(
         HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT, viwe_input_name, viwe_output_name,
         viwe_input_name, viwe_output_name);
   }
-
-  return_str = "return ToPyObject(out);";
+  if (!inplace_map.empty()) {
+    // For inplace op, Use the input PyObject directly.
+    for (auto& inplace_pair : inplace_map) {
+      // Find index of inplace tensor, and directly use input PyObject.
+      std::string inplace_arg_name = inplace_pair.second;
+      std::string inplace_return_name = inplace_pair.first;
+      const char* RETURN_INPLACE_TENSOR_TEMPLATE =
+          "ssize_t arg_id = GetIdxFromCoreOpsInfoMap(core_ops_args_info, "
+          "\"%s\", \"%s\");\n"
+          "    ssize_t return_id = "
+          "GetIdxFromCoreOpsInfoMap(core_ops_returns_info, \"%s\", \"%s\");\n"
+          "    return ToPyObject(out, return_id, args, arg_id);";
+      return_str = paddle::string::Sprintf(RETURN_INPLACE_TENSOR_TEMPLATE,
+                                           op_type, inplace_arg_name, op_type,
+                                           inplace_return_name);
+      // only support one inplace_var in temporary.
+      PADDLE_ENFORCE_EQ(
+          inplace_map.size(), 1,
+          paddle::platform::errors::InvalidArgument(
+              "size of inplace_map must be 1, but got %d", inplace_map.size()));
+      break;
+    }
+  } else {
+    return_str = "return ToPyObject(out);";
+  }
 
   std::string function_args = "";
   if (input_args == "") {
@@ -383,14 +411,49 @@ GenerateOpFunctions() {
       continue;
     }
     std::string func_name = "eager_api_" + op_type;
-    std::string op_function_str = GenerateOpFunctionsBody(op_proto, func_name);
+    std::string op_function_str =
+        GenerateOpFunctionsBody(op_proto, func_name, {});
 
     // generate pybind item
     auto bind_function_str = paddle::string::Sprintf(
         PYBIND_ITEM_TEMPLATE, op_type, func_name, op_type);
 
     op_function_list.emplace_back(std::move(op_function_str));
     bind_function_list.emplace_back(std::move(bind_function_str));
+
+    // NOTE(pangyoki): Inplace Strategy.
+    // In this case, output will reuse input varbase.
+    // Dygraph mode needs to be aligned with the in-place strategy in static
+    // mode, and the mapping relationships between output and input that have
+    // been defined in static mode should be used in dygraph mode.
+    // Find which ops need to use Inplace strategy in static mode, and get the
+    // mapping relationship between Inplace output and input.
+    auto& infer_inplace =
+        paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_;
+    std::map<std::string, std::string> inplace_map;
+    // `sum` op has duplicate input. Don't consider adding inplace strategy
+    // for `sum` in temporary.
+    if (op_type != "sum" && infer_inplace) {
+      // Inplace OP: op_type_.
+      // The inplace OP needs a new implementation method.
+      auto in_to_outs = infer_inplace(true);
+      for (auto& inplace_pair : in_to_outs) {
+        inplace_map[inplace_pair.second] = inplace_pair.first;
+      }
+
+      std::string inplace_op_type = op_type + "_";
+      std::string inplace_func_name = "eager_api_" + inplace_op_type;
+      std::string inplace_op_function_str =
+          GenerateOpFunctionsBody(op_proto, inplace_func_name, inplace_map);
+
+      // generate pybind item
+      auto inplace_bind_function_str =
+          paddle::string::Sprintf(PYBIND_ITEM_TEMPLATE, inplace_op_type,
+                                  inplace_func_name, inplace_op_type);
+
+      op_function_list.emplace_back(std::move(inplace_op_function_str));
+      bind_function_list.emplace_back(std::move(inplace_bind_function_str));
+    }
   }
   if (append_custom_head_file) {
     op_function_list.emplace_back(CUSTOM_HANDWRITE_OP_FUNC_FILE);

diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
@@ -417,6 +417,8 @@ PyObject* ToPyObject(bool value) {
 
 PyObject* ToPyObject(int value) { return PyLong_FromLong(value); }
 
+PyObject* ToPyObject(uint32_t value) { return PyLong_FromUnsignedLong(value); }
+
 PyObject* ToPyObject(int64_t value) { return PyLong_FromLongLong(value); }
 
 PyObject* ToPyObject(float value) { return PyLong_FromDouble(value); }
@@ -442,6 +444,20 @@ PyObject* ToPyObject(const paddle::experimental::Tensor& value) {
   return obj;
 }
 
+PyObject* ToPyObject(const paddle::experimental::Tensor& value,
+                     ssize_t value_idx, PyObject* args, ssize_t arg_idx) {
+  // For inplace op, directly return the input PyObject of the inplace tensor.
+  // [Parameter]
+  // value: Useless parameter.
+  // value_idx: Useless parameter.
+  // args: Input PyObject.
+  // arg_idx: Index of inplace PyObject in input args. Used to find the input
+  // inplace PyObject.
+  PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
+  Py_INCREF(obj);
+  return obj;
+}
+
 PyObject* ToPyObject(const std::vector<bool>& value) {
   PyObject* result = PyList_New((Py_ssize_t)value.size());