Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pten] add concat pten kernel #38955

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ else()
endif()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim mixed_vector place tensor framework_proto version)

cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_utils lod_tensor memory)

if(WITH_GPU)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
Expand Down
42 changes: 3 additions & 39 deletions paddle/fluid/framework/lod_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ bool CheckLoD(const LoD &in, int tensor_height) {
}
// check: the lowest level's last offset should equals `tensor_height` if
// tensor_height>0.
if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
if (tensor_height > 0 &&
static_cast<size_t>(tensor_height) != in.back().back())
return false;

// check: the higher level's last offset should equals the lower level's
Expand Down Expand Up @@ -156,7 +157,7 @@ bool CheckAbsLoD(const LoD &in, int tensor_height) {
if (level.front() != 0) return false;
if (tensor_height < 0) {
tensor_height = level.back();
} else if ((size_t)tensor_height != level.back()) {
} else if (static_cast<size_t>(tensor_height) != level.back()) {
return false;
}
}
Expand Down Expand Up @@ -192,27 +193,6 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
return LoDAndOffset{sub_lod, {start_idx, end_idx}};
}

void AppendLoD(LoD *lod, const LoD &lod_length) {
PADDLE_ENFORCE(
lod->empty() || lod->size() == lod_length.size(),
platform::errors::InvalidArgument(
"The input LoD length should be equal to the appended LoD size, but "
"received input LoD length is %d, actual LoD size is %d.",
lod_length, lod->size()));
if (lod->empty()) {
for (size_t i = 0; i < lod_length.size(); ++i) {
lod->emplace_back(1, 0); // size = 1, value = 0;
}
*lod = LoD(lod_length.size(), std::vector<size_t>({0}));
}
for (size_t i = 0; i < lod->size(); ++i) {
auto &level = (*lod)[i];
for (size_t len : lod_length[i]) {
level.push_back(level.back() + len);
}
}
}

void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
const platform::DeviceContext &dev_ctx) {
{ // the 1st field, uint32_t version for LoDTensor
Expand Down Expand Up @@ -319,22 +299,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
}

LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}

LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
Expand Down
14 changes: 0 additions & 14 deletions paddle/fluid/framework/lod_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,6 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset(
const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level);

void AppendLoD(LoD* lod, const LoD& lod_length);

/*
* Serialize/Desiralize LoDTensor to std::ostream
* You can pass ofstream or ostringstream to serilize to file
Expand All @@ -179,18 +177,6 @@ void DeserializeFromStream(std::istream& is, LoDTensor* tensor,
const size_t& seek,
const std::vector<int64_t>& shape);

/*
* Convert between length-based LoD and offset-based LoD.
* The implementation of LoDTensor class use offset-based LoD.
* However, we want to expose the more user-friendly length-based
* LoD to the Python side instead.
*
* Example:
* If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
* then length_lod = [[2, 1], [3, 2, 4]]
*/
LoD ConvertToLengthBasedLoD(const LoD& offset_lod);

LoD ConvertToOffsetBasedLoD(const LoD& length_lod);

void SerializeToStream(std::ostream& os, const LoDTensor& tensor);
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/framework/lod_tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <gtest/gtest.h>

#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/pten/core/lod_utils.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -98,7 +99,7 @@ TEST(LoD, AppendLoD) {
origin.push_back(std::vector<size_t>({0, 1, 6}));
origin.push_back(std::vector<size_t>({0, 2, 5, 7, 10, 12, 15}));

paddle::framework::AppendLoD(&origin, lod_lens);
pten::AppendLoD(&origin, lod_lens);

LoD expected;
expected.push_back(std::vector<size_t>({0, 2, 4}));
Expand Down Expand Up @@ -277,7 +278,7 @@ TEST(LoD, ConvertToLengthBasedLoD) {
offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));

LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
LoD length_lod = pten::ConvertToLengthBasedLoD(offset_lod);

LoD expected;
expected.push_back(std::vector<size_t>({2}));
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1901,6 +1901,10 @@ void OperatorWithKernel::BuildPtenKernelContext(
std::type_index(typeid(std::string))) {
pt_kernel_context->EmplaceBackAttr(
std::move(pten::Scalar(BOOST_GET_CONST(std::string, attr))));
} else if (std::type_index(attr.type()) ==
std::type_index(typeid(int))) {
pt_kernel_context->EmplaceBackAttr(
std::move(pten::Scalar(BOOST_GET_CONST(int, attr))));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported cast op attribute `%s` to Scalar when construct "
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/imperative/prepared_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,10 @@ static void BuildDygraphPtenKernelContext(
std::type_index(typeid(std::string))) {
kernel_ctx->EmplaceBackAttr(
std::move(pten::Scalar(BOOST_GET_CONST(std::string, attr))));
} else if (std::type_index(attr.type()) ==
std::type_index(typeid(int))) {
kernel_ctx->EmplaceBackAttr(
std::move(pten::Scalar(BOOST_GET_CONST(int, attr))));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported cast op attribute `%s` to Scalar when construct "
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/operators/array_to_lod_tensor_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */
#include <paddle/fluid/operators/math/concat_and_split.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/pten/core/lod_utils.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -168,7 +169,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
x[x_idx].lod(), idx, idx + 1, 0);

auto &lod_length = lod_and_offset.first;
framework::AppendLoD(out_lod, lod_length);
pten::AppendLoD(out_lod, lod_length);

size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second;
Expand Down
15 changes: 13 additions & 2 deletions paddle/fluid/operators/concat_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ limitations under the License. */
#include <string>
#include <vector>

#include "paddle/pten/kernels/funcs/concat_funcs.h"

#ifdef PADDLE_WITH_MKLDNN
#include <paddle/fluid/platform/mkldnn_helper.h>
#endif
Expand Down Expand Up @@ -56,8 +58,8 @@ class ConcatOp : public framework::OperatorWithKernel {
size_t axis =
ComputeAxis(static_cast<int64_t>(ctx->Attrs().Get<int>("axis")),
static_cast<int64_t>(inputs_dims[0].size()));
framework::DDim out_dims =
ComputeAndCheckShape(ctx->IsRuntime(), inputs_dims, axis);
framework::DDim out_dims = pten::funcs::ComputeAndCheckShape(
ctx->IsRuntime(), inputs_dims, axis);
if (out_dims[axis] < 0) {
out_dims[axis] = -1;
}
Expand Down Expand Up @@ -102,6 +104,15 @@ class ConcatOp : public framework::OperatorWithKernel {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}

framework::KernelSignature GetExpectedPtenKernelArgs(
const framework::ExecutionContext &ctx) const override {
if (ctx.HasInput("AxisTensor")) {
return framework::KernelSignature("concat", {"X"}, {"AxisTensor"},
{"Out"});
}
return framework::KernelSignature("concat", {"X"}, {"axis"}, {"Out"});
}
};

class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
Expand Down
111 changes: 11 additions & 100 deletions paddle/fluid/operators/concat_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,54 +22,11 @@ limitations under the License. */
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/utils.h"

#include "paddle/pten/kernels/concat_kernel.h"
#include "paddle/pten/kernels/funcs/concat_funcs.h"

namespace paddle {
namespace operators {
static inline framework::DDim ComputeAndCheckShape(
const bool is_runtime, const std::vector<framework::DDim>& inputs_dims,
const size_t axis) {
const size_t n = inputs_dims.size();
auto out_dims = inputs_dims[0];
size_t in_zero_dims_size = out_dims.size();
for (size_t i = 1; i < n; i++) {
PADDLE_ENFORCE_EQ(inputs_dims[i].size(), out_dims.size(),
platform::errors::InvalidArgument(
"The shape of input[0] and input[%d] "
"is expected to be equal."
"But received input[0]'s shape = "
"[%s], input[%d]'s shape = [%s].",
i, inputs_dims[0], i, inputs_dims[i]));
for (size_t j = 0; j < in_zero_dims_size; j++) {
if (j == axis) {
if (is_runtime) {
out_dims[axis] += inputs_dims[i][j];
} else {
if (inputs_dims[i][j] == -1 || out_dims[j] == -1) {
out_dims[axis] = -1;
} else {
out_dims[axis] += inputs_dims[i][j];
}
}
} else {
bool check_shape =
is_runtime || (inputs_dims[0][j] > 0 && inputs_dims[i][j] > 0);
if (check_shape) {
// check all shape in run time
PADDLE_ENFORCE_EQ(inputs_dims[0][j], inputs_dims[i][j],
platform::errors::InvalidArgument(
"The %d-th dimension of input[0] and input[%d] "
"is expected to be equal."
"But received input[0]'s shape = "
"[%s], input[%d]'s shape = [%s].",
j, i, inputs_dims[0], i, inputs_dims[i]));
}
if (!is_runtime && out_dims[j] == -1 && inputs_dims[i][j] > 0) {
out_dims[j] = inputs_dims[i][j];
}
}
}
}
return out_dims;
}

static inline int64_t ComputeAxis(int64_t axis, int64_t rank) {
PADDLE_ENFORCE_EQ(
Expand Down Expand Up @@ -109,67 +66,21 @@ class ConcatKernel : public framework::OpKernel<T> {
ins_dims[i] = ins[i]->dims();
}

framework::DDim out_dims = ComputeAndCheckShape(true, ins_dims, axis);
framework::DDim out_dims =
pten::funcs::ComputeAndCheckShape(true, ins_dims, axis);
out->Resize(out_dims);
}
auto place = ctx.GetPlace();
out->mutable_data<T>(place);

// If axis is 0, the lod of the output is not the same as inputs.
if (axis == 0 && ins[0]->lod().size() > 0) {
size_t lod_size_0 = ins[0]->lod().size();
size_t lod_size = lod_size_0;
for (size_t i = 1; i < ins.size(); ++i) {
if (ins[i]->lod().size() > 0) {
PADDLE_ENFORCE_EQ(
ins[i]->lod().size(), lod_size_0,
platform::errors::Unimplemented(
"The lod level of all input LoDTensors should be same. "
"Maybe different lod level of input LoDTensors can concat,"
"it is not supported currently. The lod level of %dth input "
"is %d and first input is %d.",
i, ins[i]->lod().size(), lod_size_0));
} else {
lod_size = 0;
break;
}
}
if (lod_size) {
auto* out_lod = out->mutable_lod();
for (size_t i = 1; i < ins.size(); ++i) {
auto in_lod = ConvertToLengthBasedLoD(ins[i]->lod());
AppendLoD(out_lod, in_lod);
}
}
// call new kernel
auto& dev_ctx = ctx.device_context<DeviceContext>();
std::vector<pten::DenseTensor> pt_ins;
for (auto& in : ins) {
pt_ins.push_back(*in);
}

// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && ins.size() < 10) {
size_t output_offset = 0;
for (auto* in : ins) {
if (!in || in->numel() == 0UL) {
continue;
}
auto in_stride = framework::stride_numel(in->dims());
auto out_stride = framework::stride_numel(out->dims());
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis,
out->data<T>() + output_offset, out_stride,
in->data<T>(), in_stride, in_stride[axis]);
output_offset += in_stride[axis];
}
} else {
std::vector<framework::Tensor> inputs;
for (size_t j = 0; j < ins.size(); ++j) {
if (ins[j] && ins[j]->numel() > 0) {
inputs.push_back(*ins[j]);
} else {
continue;
}
}
auto& dev_ctx = ctx.template device_context<DeviceContext>();
paddle::operators::math::ConcatFunctor<DeviceContext, T> concat_functor;
concat_functor(dev_ctx, inputs, static_cast<int>(axis), out);
}
pten::ConcatKernel<T>(dev_ctx, pt_ins, axis, out);
}
};

Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/operators/concat_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/platform/device/xpu/xpu_header.h"

#include "paddle/pten/core/lod_utils.h"

namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
Expand Down Expand Up @@ -69,8 +71,8 @@ class ConcatXPUKernel : public framework::OpKernel<T> {
if (lod_size) {
auto* out_lod = out->mutable_lod();
for (size_t i = 1; i < ins.size(); ++i) {
auto in_lod = ConvertToLengthBasedLoD(ins[i]->lod());
AppendLoD(out_lod, in_lod);
auto in_lod = pten::ConvertToLengthBasedLoD(ins[i]->lod());
pten::AppendLoD(out_lod, in_lod);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/operators/lod_tensor_to_array_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/pten/core/lod_utils.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -134,7 +135,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(
x.lod(), start_idx, start_idx + 1, rank_level + 1);
auto &lod_length = lod_and_offset.first;
framework::AppendLoD(&lod, lod_length);
pten::AppendLoD(&lod, lod_length);
size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second;
copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset});
Expand Down
Loading