PaddlePaddle · wangchaochaohu · Oct 14, 2020 · Sep 28, 2020 · Sep 29, 2020 · Sep 29, 2020
diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h
@@ -66,7 +66,7 @@ class FillConstantKernel : public framework::OpKernel<T> {
               value_tensor->numel()));
       const T *tensor_data = value_tensor->data<T>();
       framework::Tensor cpu_tensor;
-      if (platform::is_gpu_place(value_tensor->place())) {
+      if (!platform::is_cpu_place(value_tensor->place())) {
         TensorCopySync(*value_tensor, platform::CPUPlace(), &cpu_tensor);
         tensor_data = cpu_tensor.data<T>();
       }
@@ -96,12 +96,20 @@ class FillConstantKernel : public framework::OpKernel<T> {
               tensor, static_cast<T>(value));
     }
 #ifdef PADDLE_WITH_CUDA
-    if (!cpu_place) {
+    if (ctx.GetPlace() == platform::CUDAPlace()) {
       tensor->mutable_data(ctx.GetPlace(), data_type);
       math::SetConstant<platform::CUDADeviceContext, T> functor;
       functor(reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx),
               tensor, static_cast<T>(value));
     }
+#endif
+#ifdef PADDLE_WITH_XPU
+    if (ctx.GetPlace() == platform::XPUPlace()) {
+      tensor->mutable_data(ctx.GetPlace(), data_type);
+      math::SetConstant<platform::XPUDeviceContext, T> functor;
+      functor(reinterpret_cast<const platform::XPUDeviceContext &>(dev_ctx),
+              tensor, static_cast<T>(value));
+    }
 #endif
   }
 };

diff --git a/paddle/fluid/operators/fill_constant_op_xpu.cc b/paddle/fluid/operators/fill_constant_op_xpu.cc
@@ -0,0 +1,23 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/fill_constant_op.h"
+
+namespace ops = paddle::operators;
+#ifdef PADDLE_WITH_XPU
+REGISTER_OP_XPU_KERNEL(fill_constant, ops::FillConstantKernel<float>,
+                       ops::FillConstantKernel<int64_t>,
+                       ops::FillConstantKernel<double>,
+                       ops::FillConstantKernel<bool>,
+                       ops::FillConstantKernel<int>);
+#endif
diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include <cblas.h>
 #endif
 
+#include <memory>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/data_type.h"
@@ -44,6 +45,15 @@ template struct SetConstant<platform::CPUDeviceContext, int64_t>;
 template struct SetConstant<platform::CPUDeviceContext, bool>;
 template struct SetConstant<platform::CPUDeviceContext, uint8_t>;
 
+#ifdef PADDLE_WITH_XPU
+template struct SetConstant<platform::XPUDeviceContext, platform::float16>;
+template struct SetConstant<platform::XPUDeviceContext, float>;
+template struct SetConstant<platform::XPUDeviceContext, double>;
+template struct SetConstant<platform::XPUDeviceContext, int>;
+template struct SetConstant<platform::XPUDeviceContext, int64_t>;
+template struct SetConstant<platform::XPUDeviceContext, bool>;
+#endif
+
 #define DEFINE_CPU_TRANS(RANK)                                              \
   template struct Transpose<platform::CPUDeviceContext, platform::float16,  \
                             RANK>;                                          \
@@ -131,7 +141,12 @@ template <>
 void set_constant_with_place<platform::XPUPlace>(
     const platform::DeviceContext& context, framework::Tensor* tensor,
     float value) {
+#ifdef PADDLE_WITH_XPU
+  framework::VisitDataType(tensor->type(),
+                           TensorSetConstantXPU<float>(tensor, value));
+#else
   PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported"));
+#endif
 }
 
 template <>
@@ -166,11 +181,19 @@ struct TensorSetConstantWithPlace : public boost::static_visitor<void> {
 void set_constant(const platform::DeviceContext& context,
                   framework::Tensor* tensor, float value) {
   TensorSetConstantWithPlace func(context, tensor, value);
+  if (is_cpu_place(tensor->place())) {
+    func(platform::CPUPlace());
+#ifdef PADDLE_WITH_XPU
+  } else if (is_xpu_place(tensor->place())) {
+    int dev_id = -1;
+    xpu_current_device(&dev_id);
+    func(platform::XPUPlace(dev_id));
+#endif
 #ifdef PADDLE_WITH_CUDA
-  tensor->place().apply_visitor(func);
-#else
-  func(platform::CPUPlace());
+  } else if (is_gpu_place(tensor->place())) {
+    tensor->place().apply_visitor(func);
 #endif
+  }
 }
 
 template <typename T>

diff --git a/paddle/fluid/operators/math/math_function.h b/paddle/fluid/operators/math/math_function.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <cmath>
+#include <memory>
 #include <vector>
 
 #include "paddle/fluid/framework/eigen.h"
@@ -84,6 +85,33 @@ struct RowwiseMean {
                   framework::Tensor* vec);
 };
 
+#ifdef PADDLE_WITH_XPU
+template <typename U>
+struct TensorSetConstantXPU {
+  TensorSetConstantXPU(framework::Tensor* tensor, U value)
+      : tensor_(tensor), value_(value) {}
+  template <typename T>
+  void apply() const {
+    int dev_id = -1;
+    xpu_current_device(&dev_id);
+    if (dev_id >= 64) {
+      // if dev_id >= 64, the device is a simulator device, -64 to get real
+      // dev_id
+      dev_id -= 64;
+    }
+    auto xpu = platform::XPUPlace(dev_id);
+    auto* begin = tensor_->mutable_data<T>(xpu);
+    int numel = tensor_->numel();
+    std::unique_ptr<T[]> data_cpu(new T[numel]);
+    std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast<T>(value_));
+    memory::Copy(xpu, begin, platform::CPUPlace(),
+                 static_cast<void*>(data_cpu.get()), numel * sizeof(T));
+  }
+  framework::Tensor* tensor_;
+  U value_;
+};
+#endif
+
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/math/math_function_impl.h b/paddle/fluid/operators/math/math_function_impl.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include <memory>
 #include <vector>
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/operators/math/math_function.h"
@@ -27,8 +28,13 @@ template <typename DeviceContext, typename T>
 void SetConstant<DeviceContext, T>::operator()(const DeviceContext& context,
                                                framework::Tensor* tensor,
                                                T num) {
+#ifdef PADDLE_WITH_XPU
+  framework::VisitDataType(tensor->type(),
+                           TensorSetConstantXPU<T>(tensor, num));
+#else
   auto t = framework::EigenVector<T>::Flatten(*tensor);
   t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
+#endif
 }
 
 template <typename DeviceContext, typename T, int Rank>

diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h
@@ -26,15 +26,16 @@ inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
   if (x->type() == framework::proto::VarType::INT32) {
     auto* data = x->data<int>();
     framework::Tensor cpu_attr_tensor;
-    if (platform::is_gpu_place(x->place())) {
+    if (!platform::is_cpu_place(x->place())) {
       TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
       data = cpu_attr_tensor.data<int>();
     }
+
     vec_new_data = std::vector<T>(data, data + x->numel());
   } else if (x->type() == framework::proto::VarType::INT64) {
     auto* data = x->data<int64_t>();
     framework::Tensor cpu_attr_tensor;
-    if (platform::is_gpu_place(x->place())) {
+    if (!platform::is_cpu_place(x->place())) {
       TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
       data = cpu_attr_tensor.data<int64_t>();
     }
@@ -62,15 +63,15 @@ inline std::vector<T> GetDataFromTensorList(
                           tensor->dims()));
 
     if (tensor->type() == framework::proto::VarType::INT32) {
-      if (platform::is_gpu_place(tensor->place())) {
+      if (!platform::is_cpu_place(tensor->place())) {
         framework::Tensor temp;
         TensorCopySync(*tensor, platform::CPUPlace(), &temp);
         vec_new_data.push_back(static_cast<T>(*temp.data<int>()));
       } else {
         vec_new_data.push_back(static_cast<T>(*tensor->data<int>()));
       }
     } else if (tensor->type() == framework::proto::VarType::INT64) {
-      if (platform::is_gpu_place(tensor->place())) {
+      if (!platform::is_cpu_place(tensor->place())) {
         framework::Tensor temp;
         TensorCopySync(*tensor, platform::CPUPlace(), &temp);
         // NOTE: Converting int64 to int32 may cause data overflow.