From c8164381686b73396d66064ae225a2c856126e52 Mon Sep 17 00:00:00 2001 From: PaulinaGacek Date: Tue, 29 Nov 2022 19:47:03 +0100 Subject: [PATCH 1/3] OneDNN version of Copy, tranpose kernels adjusted --- paddle/phi/core/tensor_utils.cc | 43 ++++++++++++------- .../kernels/onednn/transpose_grad_kernel.cc | 2 +- paddle/phi/kernels/onednn/transpose_kernel.cc | 2 +- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/paddle/phi/core/tensor_utils.cc b/paddle/phi/core/tensor_utils.cc index 6e87f40ed0ab07..467552032f0ad6 100644 --- a/paddle/phi/core/tensor_utils.cc +++ b/paddle/phi/core/tensor_utils.cc @@ -56,6 +56,9 @@ void Copy(const Context& dev_ctx, void* dst_ptr = nullptr; if (paddle::platform::is_cpu_place(dst_place)) { dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); +#ifdef PADDLE_WITH_MKLDNN + dst->set_layout(src.layout()); +#endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) } else if (paddle::platform::is_gpu_place(dst_place) || paddle::platform::is_cuda_pinned_place(dst_place)) { @@ -81,7 +84,7 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( dst->place(), dst_place, - phi::errors::Unavailable( + errors::Unavailable( "The Dst Tensor's place and dst_place do not match, Tensor's place " "place is %s, dst_place is %s.", dst->place(), @@ -112,13 +115,13 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto ctx_gpu_place = ctx_place; PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, - phi::errors::Unavailable( + errors::Unavailable( "Source place and context place do not match, source " "place is %s, context place is %s.", src_gpu_place, @@ -137,17 +140,17 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto ctx_gpu_place = ctx_place; - PADDLE_ENFORCE_EQ(dst_gpu_place, - ctx_gpu_place, - phi::errors::Unavailable( - "Destination place and context place do not match, " - "destination place is %s, context place is %s.", - dst_gpu_place, - ctx_gpu_place)); + PADDLE_ENFORCE_EQ( + dst_gpu_place, + ctx_gpu_place, + errors::Unavailable("Destination place and context place do not match, " + "destination place is %s, context place is %s.", + dst_gpu_place, + ctx_gpu_place)); auto stream = blocking ? nullptr : reinterpret_cast(dev_ctx).stream(); @@ -161,7 +164,7 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto stream = @@ -184,7 +187,7 @@ void Copy(const Context& dev_ctx, paddle::memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { - PADDLE_THROW(phi::errors::Unavailable( + PADDLE_THROW(errors::Unavailable( "Context place dose not match the source and destination place.")); } } @@ -196,13 +199,13 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto ctx_gpu_place = ctx_place; PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, - phi::errors::Unavailable( + errors::Unavailable( "Source place and context place do not match, source " "place is %s, context place is %s.", src_gpu_place, @@ -259,7 +262,7 @@ void Copy(const Context& dev_ctx, paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); #endif } else { - PADDLE_THROW(phi::errors::Unimplemented( + PADDLE_THROW(errors::Unimplemented( "Copy from %s to %s is not supported.", src_place, dst_place)); } } @@ -411,4 +414,12 @@ template void Copy(const CustomContext& dev_ctx, bool blocking, DenseTensor* dst); #endif + +#ifdef PADDLE_WITH_MKLDNN +template void Copy(const OneDNNContext& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); +#endif } // namespace phi diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc index 64f1f9f610861b..49711104bf651d 100644 --- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc @@ -33,7 +33,7 @@ void TransposeGradKernel(const Context& dev_ctx, const auto& onednn_engine = dev_ctx.GetEngine(); if (axis.size() == 1) { - paddle::framework::TensorCopy(out_grad, out_grad.place(), x_grad); + Copy(dev_ctx, out_grad, out_grad.place(), false, x_grad); x_grad->set_mem_desc(out_grad.mem_desc()); return; } diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc index 26c89197e0d7f4..3cfac3d5b48364 100644 --- a/paddle/phi/kernels/onednn/transpose_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_kernel.cc @@ -80,7 +80,7 @@ void TransposeKernel(const Context& dev_ctx, dev_ctx, const_cast(&x), x.mem_desc()); if (axis.size() == 1) { - paddle::framework::TensorCopy(x, x.place(), out); + Copy(dev_ctx, x, x.place(), false, out); out->set_mem_desc(x.mem_desc()); return; } From 9b0861187463c00b351c8b1d4cd8872755ae584e Mon Sep 17 00:00:00 2001 From: PaulinaGacek Date: Tue, 29 Nov 2022 19:58:39 +0100 Subject: [PATCH 2/3] style fixes in tranpose_grad --- paddle/phi/kernels/onednn/transpose_grad_kernel.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc index 49711104bf651d..ec0f37120d20ec 100644 --- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc @@ -24,10 +24,10 @@ void TransposeGradKernel(const Context& dev_ctx, const DenseTensor& out_grad, const std::vector& axis, DenseTensor* x_grad) { - PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == phi::AllocationType::CPU, + PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == AllocationType::CPU, true, errors::PreconditionNotMet( - "Operator DNNL TransposeGrad must use CPUPlace")); + "oneDNN TransposeGrad kernel must use CPUPlace")); if (!x_grad) return; const auto& onednn_engine = dev_ctx.GetEngine(); From 47583daae381f01f384d72c9e853527d4d1181b5 Mon Sep 17 00:00:00 2001 From: PaulinaGacek Date: Tue, 6 Dec 2022 10:04:00 +0100 Subject: [PATCH 3/3] redundant headers deleted --- paddle/phi/kernels/onednn/transpose_grad_kernel.cc | 2 -- paddle/phi/kernels/onednn/transpose_kernel.cc | 1 - 2 files changed, 3 deletions(-) diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc index ec0f37120d20ec..dafbb75dc07ac5 100644 --- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc @@ -13,8 +13,6 @@ // limitations under the License. #include "paddle/phi/kernels/transpose_grad_kernel.h" - -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc index 3cfac3d5b48364..a36d5e4493a549 100644 --- a/paddle/phi/kernels/onednn/transpose_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_kernel.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/phi/kernels/transpose_kernel.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h"