Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xpu support for fill_constant Op #27675

Merged
merged 28 commits into from
Oct 14, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3b14a4a
add xpu support for fill_constant Op
wangchaochaohu Sep 28, 2020
58dda98
refine
wangchaochaohu Sep 29, 2020
3457967
refine
wangchaochaohu Sep 29, 2020
8e230d3
refine
wangchaochaohu Sep 29, 2020
a7c96d8
refine
wangchaochaohu Sep 29, 2020
c1bf0c4
refine
wangchaochaohu Sep 29, 2020
0114382
refine
wangchaochaohu Sep 30, 2020
2c9127e
refine
wangchaochaohu Sep 30, 2020
fcd4d97
Merge branch 'develop' into xpu_support
wangchaochaohu Sep 30, 2020
b9dcc1f
refine;test=kunlun
wangchaochaohu Oct 12, 2020
4b25887
refine; test=kunlun
wangchaochaohu Oct 12, 2020
b7e635a
refine test=kunlun
wangchaochaohu Oct 12, 2020
0bf406d
Merge branch 'develop' into xpu_support
wangchaochaohu Oct 12, 2020
7d0e292
refine for kunlun test=kunlun
wangchaochaohu Oct 12, 2020
7742a86
refine test=kunlun
wangchaochaohu Oct 12, 2020
0604bee
refine test=kunlun
wangchaochaohu Oct 13, 2020
e0dcafc
refine test=kunlun
wangchaochaohu Oct 13, 2020
c989603
refine test=kunlun
wangchaochaohu Oct 13, 2020
220c8a8
refine test=kunlun
wangchaochaohu Oct 13, 2020
db8cfb1
refine test=kunlun
wangchaochaohu Oct 13, 2020
db389fe
refine test=kunlun
wangchaochaohu Oct 13, 2020
1dca9aa
Merge branch 'develop' into xpu_support
wangchaochaohu Oct 13, 2020
34ff7f0
refine test=kunlun
wangchaochaohu Oct 13, 2020
f107bd6
refine test=kunlun
wangchaochaohu Oct 13, 2020
2eae208
refine test=kunlun
wangchaochaohu Oct 14, 2020
70debae
refine test=kunlun
wangchaochaohu Oct 14, 2020
377fadc
refine test=kunlun
wangchaochaohu Oct 14, 2020
17225e0
refine test=kunlun
wangchaochaohu Oct 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions paddle/fluid/operators/fill_constant_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class FillConstantKernel : public framework::OpKernel<T> {
value_tensor->numel()));
const T *tensor_data = value_tensor->data<T>();
framework::Tensor cpu_tensor;
if (platform::is_gpu_place(value_tensor->place())) {
if (!platform::is_cpu_place(value_tensor->place())) {
TensorCopySync(*value_tensor, platform::CPUPlace(), &cpu_tensor);
tensor_data = cpu_tensor.data<T>();
}
Expand Down Expand Up @@ -96,12 +96,20 @@ class FillConstantKernel : public framework::OpKernel<T> {
tensor, static_cast<T>(value));
}
#ifdef PADDLE_WITH_CUDA
if (!cpu_place) {
if (ctx.GetPlace() == platform::CUDAPlace()) {
tensor->mutable_data(ctx.GetPlace(), data_type);
math::SetConstant<platform::CUDADeviceContext, T> functor;
functor(reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx),
tensor, static_cast<T>(value));
}
#endif
#ifdef PADDLE_WITH_XPU
if (ctx.GetPlace() == platform::XPUPlace()) {
tensor->mutable_data(ctx.GetPlace(), data_type);
math::SetConstant<platform::XPUDeviceContext, T> functor;
functor(reinterpret_cast<const platform::XPUDeviceContext &>(dev_ctx),
tensor, static_cast<T>(value));
}
#endif
}
};
Expand Down
23 changes: 23 additions & 0 deletions paddle/fluid/operators/fill_constant_op_xpu.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_constant_op.h"

namespace ops = paddle::operators;
#ifdef PADDLE_WITH_XPU
REGISTER_OP_XPU_KERNEL(fill_constant, ops::FillConstantKernel<float>,
ops::FillConstantKernel<int64_t>,
ops::FillConstantKernel<double>,
ops::FillConstantKernel<bool>,
ops::FillConstantKernel<int>);
#endif
29 changes: 26 additions & 3 deletions paddle/fluid/operators/math/math_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ limitations under the License. */
#include <cblas.h>
#endif

#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
Expand All @@ -44,6 +45,15 @@ template struct SetConstant<platform::CPUDeviceContext, int64_t>;
template struct SetConstant<platform::CPUDeviceContext, bool>;
template struct SetConstant<platform::CPUDeviceContext, uint8_t>;

#ifdef PADDLE_WITH_XPU
template struct SetConstant<platform::XPUDeviceContext, platform::float16>;
template struct SetConstant<platform::XPUDeviceContext, float>;
template struct SetConstant<platform::XPUDeviceContext, double>;
template struct SetConstant<platform::XPUDeviceContext, int>;
template struct SetConstant<platform::XPUDeviceContext, int64_t>;
template struct SetConstant<platform::XPUDeviceContext, bool>;
#endif

#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, platform::float16, \
RANK>; \
Expand Down Expand Up @@ -131,7 +141,12 @@ template <>
void set_constant_with_place<platform::XPUPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor,
float value) {
#ifdef PADDLE_WITH_XPU
framework::VisitDataType(tensor->type(),
TensorSetConstantXPU<float>(tensor, value));
#else
PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported"));
#endif
}

template <>
Expand Down Expand Up @@ -166,11 +181,19 @@ struct TensorSetConstantWithPlace : public boost::static_visitor<void> {
void set_constant(const platform::DeviceContext& context,
framework::Tensor* tensor, float value) {
TensorSetConstantWithPlace func(context, tensor, value);
if (is_cpu_place(tensor->place())) {
func(platform::CPUPlace());
#ifdef PADDLE_WITH_XPU
} else if (is_xpu_place(tensor->place())) {
int dev_id = -1;
xpu_current_device(&dev_id);
func(platform::XPUPlace(dev_id));
#endif
#ifdef PADDLE_WITH_CUDA
tensor->place().apply_visitor(func);
#else
func(platform::CPUPlace());
} else if (is_gpu_place(tensor->place())) {
tensor->place().apply_visitor(func);
#endif
}
}

template <typename T>
Expand Down
28 changes: 28 additions & 0 deletions paddle/fluid/operators/math/math_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */

#pragma once
#include <cmath>
#include <memory>
#include <vector>

#include "paddle/fluid/framework/eigen.h"
Expand Down Expand Up @@ -84,6 +85,33 @@ struct RowwiseMean {
framework::Tensor* vec);
};

#ifdef PADDLE_WITH_XPU
template <typename U>
struct TensorSetConstantXPU {
TensorSetConstantXPU(framework::Tensor* tensor, U value)
: tensor_(tensor), value_(value) {}
template <typename T>
void apply() const {
int dev_id = -1;
xpu_current_device(&dev_id);
if (dev_id >= 64) {
// if dev_id >= 64, the device is a simulator device, -64 to get real
// dev_id
dev_id -= 64;
}
auto xpu = platform::XPUPlace(dev_id);
auto* begin = tensor_->mutable_data<T>(xpu);
int numel = tensor_->numel();
std::unique_ptr<T[]> data_cpu(new T[numel]);
std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast<T>(value_));
memory::Copy(xpu, begin, platform::CPUPlace(),
static_cast<void*>(data_cpu.get()), numel * sizeof(T));
}
framework::Tensor* tensor_;
U value_;
};
#endif

} // namespace math
} // namespace operators
} // namespace paddle
6 changes: 6 additions & 0 deletions paddle/fluid/operators/math/math_function_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <memory>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/math/math_function.h"
Expand All @@ -27,8 +28,13 @@ template <typename DeviceContext, typename T>
void SetConstant<DeviceContext, T>::operator()(const DeviceContext& context,
framework::Tensor* tensor,
T num) {
#ifdef PADDLE_WITH_XPU
framework::VisitDataType(tensor->type(),
TensorSetConstantXPU<T>(tensor, num));
#else
auto t = framework::EigenVector<T>::Flatten(*tensor);
t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
#endif
}

template <typename DeviceContext, typename T, int Rank>
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/operators/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@ inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
if (x->type() == framework::proto::VarType::INT32) {
auto* data = x->data<int>();
framework::Tensor cpu_attr_tensor;
if (platform::is_gpu_place(x->place())) {
if (!platform::is_cpu_place(x->place())) {
TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
data = cpu_attr_tensor.data<int>();
}

vec_new_data = std::vector<T>(data, data + x->numel());
} else if (x->type() == framework::proto::VarType::INT64) {
auto* data = x->data<int64_t>();
framework::Tensor cpu_attr_tensor;
if (platform::is_gpu_place(x->place())) {
if (!platform::is_cpu_place(x->place())) {
TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
data = cpu_attr_tensor.data<int64_t>();
}
Expand Down Expand Up @@ -62,15 +63,15 @@ inline std::vector<T> GetDataFromTensorList(
tensor->dims()));

if (tensor->type() == framework::proto::VarType::INT32) {
if (platform::is_gpu_place(tensor->place())) {
if (!platform::is_cpu_place(tensor->place())) {
framework::Tensor temp;
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_new_data.push_back(static_cast<T>(*temp.data<int>()));
} else {
vec_new_data.push_back(static_cast<T>(*tensor->data<int>()));
}
} else if (tensor->type() == framework::proto::VarType::INT64) {
if (platform::is_gpu_place(tensor->place())) {
if (!platform::is_cpu_place(tensor->place())) {
framework::Tensor temp;
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
// NOTE: Converting int64 to int32 may cause data overflow.
Expand Down
Loading