From c8164381686b73396d66064ae225a2c856126e52 Mon Sep 17 00:00:00 2001
From: PaulinaGacek <paulina.gacek@intel.com>
Date: Tue, 29 Nov 2022 19:47:03 +0100
Subject: [PATCH 1/3] OneDNN version of Copy, tranpose kernels adjusted

---
 paddle/phi/core/tensor_utils.cc               | 43 ++++++++++++-------
 .../kernels/onednn/transpose_grad_kernel.cc   |  2 +-
 paddle/phi/kernels/onednn/transpose_kernel.cc |  2 +-
 3 files changed, 29 insertions(+), 18 deletions(-)
diff --git a/paddle/phi/core/tensor_utils.cc b/paddle/phi/core/tensor_utils.cc
index 6e87f40ed0ab07..467552032f0ad6 100644
--- a/paddle/phi/core/tensor_utils.cc
+++ b/paddle/phi/core/tensor_utils.cc
@@ -56,6 +56,9 @@ void Copy(const Context& dev_ctx,
   void* dst_ptr = nullptr;
   if (paddle::platform::is_cpu_place(dst_place)) {
     dst_ptr = dev_ctx.HostAlloc(dst, src.dtype());
+#ifdef PADDLE_WITH_MKLDNN
+    dst->set_layout(src.layout());
+#endif
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   } else if (paddle::platform::is_gpu_place(dst_place) ||
              paddle::platform::is_cuda_pinned_place(dst_place)) {
@@ -81,7 +84,7 @@ void Copy(const Context& dev_ctx,
   PADDLE_ENFORCE_EQ(
       dst->place(),
       dst_place,
-      phi::errors::Unavailable(
+      errors::Unavailable(
           "The Dst Tensor's place and dst_place do not match, Tensor's place "
           "place is %s, dst_place is %s.",
           dst->place(),
@@ -112,13 +115,13 @@ void Copy(const Context& dev_ctx,
     PADDLE_ENFORCE_EQ(
         paddle::platform::is_gpu_place(ctx_place),
         true,
-        phi::errors::PreconditionNotMet(
+        errors::PreconditionNotMet(
             "Context place error, excepted GPUPlace, but actually %s.",
             ctx_place));
     auto ctx_gpu_place = ctx_place;
     PADDLE_ENFORCE_EQ(src_gpu_place,
                       ctx_gpu_place,
-                      phi::errors::Unavailable(
+                      errors::Unavailable(
                           "Source place and context place do not match, source "
                           "place is %s, context place is %s.",
                           src_gpu_place,
@@ -137,17 +140,17 @@ void Copy(const Context& dev_ctx,
     PADDLE_ENFORCE_EQ(
         paddle::platform::is_gpu_place(ctx_place),
         true,
-        phi::errors::PreconditionNotMet(
+        errors::PreconditionNotMet(
             "Context place error, excepted GPUPlace, but actually %s.",
             ctx_place));
     auto ctx_gpu_place = ctx_place;
-    PADDLE_ENFORCE_EQ(dst_gpu_place,
-                      ctx_gpu_place,
-                      phi::errors::Unavailable(
-                          "Destination place and context place do not match, "
-                          "destination place is %s, context place is %s.",
-                          dst_gpu_place,
-                          ctx_gpu_place));
+    PADDLE_ENFORCE_EQ(
+        dst_gpu_place,
+        ctx_gpu_place,
+        errors::Unavailable("Destination place and context place do not match, "
+                            "destination place is %s, context place is %s.",
+                            dst_gpu_place,
+                            ctx_gpu_place));
     auto stream =
         blocking ? nullptr
                  : reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
@@ -161,7 +164,7 @@ void Copy(const Context& dev_ctx,
     PADDLE_ENFORCE_EQ(
         paddle::platform::is_gpu_place(ctx_place),
         true,
-        phi::errors::PreconditionNotMet(
+        errors::PreconditionNotMet(
             "Context place error, excepted GPUPlace, but actually %s.",
             ctx_place));
     auto stream =
@@ -184,7 +187,7 @@ void Copy(const Context& dev_ctx,
         paddle::memory::Copy(
             dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
       } else {
-        PADDLE_THROW(phi::errors::Unavailable(
+        PADDLE_THROW(errors::Unavailable(
             "Context place dose not match the source and destination place."));
       }
     }
@@ -196,13 +199,13 @@ void Copy(const Context& dev_ctx,
     PADDLE_ENFORCE_EQ(
         paddle::platform::is_gpu_place(ctx_place),
         true,
-        phi::errors::PreconditionNotMet(
+        errors::PreconditionNotMet(
             "Context place error, excepted GPUPlace, but actually %s.",
             ctx_place));
     auto ctx_gpu_place = ctx_place;
     PADDLE_ENFORCE_EQ(src_gpu_place,
                       ctx_gpu_place,
-                      phi::errors::Unavailable(
+                      errors::Unavailable(
                           "Source place and context place do not match, source "
                           "place is %s, context place is %s.",
                           src_gpu_place,
@@ -259,7 +262,7 @@ void Copy(const Context& dev_ctx,
     paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
 #endif
   } else {
-    PADDLE_THROW(phi::errors::Unimplemented(
+    PADDLE_THROW(errors::Unimplemented(
         "Copy from %s to %s is not supported.", src_place, dst_place));
   }
 }
@@ -411,4 +414,12 @@ template void Copy(const CustomContext& dev_ctx,
                    bool blocking,
                    DenseTensor* dst);
 #endif
+
+#ifdef PADDLE_WITH_MKLDNN
+template void Copy(const OneDNNContext& dev_ctx,
+                   const DenseTensor& src,
+                   Place dst_place,
+                   bool blocking,
+                   DenseTensor* dst);
+#endif
 }  // namespace phi
diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
index 64f1f9f610861b..49711104bf651d 100644
--- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
@@ -33,7 +33,7 @@ void TransposeGradKernel(const Context& dev_ctx,
   const auto& onednn_engine = dev_ctx.GetEngine();
 
   if (axis.size() == 1) {
-    paddle::framework::TensorCopy(out_grad, out_grad.place(), x_grad);
+    Copy<Context>(dev_ctx, out_grad, out_grad.place(), false, x_grad);
     x_grad->set_mem_desc(out_grad.mem_desc());
     return;
   }
diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc
index 26c89197e0d7f4..3cfac3d5b48364 100644
--- a/paddle/phi/kernels/onednn/transpose_kernel.cc
+++ b/paddle/phi/kernels/onednn/transpose_kernel.cc
@@ -80,7 +80,7 @@ void TransposeKernel(const Context& dev_ctx,
       dev_ctx, const_cast<DenseTensor*>(&x), x.mem_desc());
 
   if (axis.size() == 1) {
-    paddle::framework::TensorCopy(x, x.place(), out);
+    Copy<Context>(dev_ctx, x, x.place(), false, out);
     out->set_mem_desc(x.mem_desc());
     return;
   }

From 9b0861187463c00b351c8b1d4cd8872755ae584e Mon Sep 17 00:00:00 2001
From: PaulinaGacek <paulina.gacek@intel.com>
Date: Tue, 29 Nov 2022 19:58:39 +0100
Subject: [PATCH 2/3] style fixes in tranpose_grad

---
 paddle/phi/kernels/onednn/transpose_grad_kernel.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
index 49711104bf651d..ec0f37120d20ec 100644
--- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
@@ -24,10 +24,10 @@ void TransposeGradKernel(const Context& dev_ctx,
                          const DenseTensor& out_grad,
                          const std::vector<int>& axis,
                          DenseTensor* x_grad) {
-  PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == phi::AllocationType::CPU,
+  PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == AllocationType::CPU,
                     true,
                     errors::PreconditionNotMet(
-                        "Operator DNNL TransposeGrad must use CPUPlace"));
+                        "oneDNN TransposeGrad kernel must use CPUPlace"));
   if (!x_grad) return;
 
   const auto& onednn_engine = dev_ctx.GetEngine();

From 47583daae381f01f384d72c9e853527d4d1181b5 Mon Sep 17 00:00:00 2001
From: PaulinaGacek <paulina.gacek@intel.com>
Date: Tue, 6 Dec 2022 10:04:00 +0100
Subject: [PATCH 3/3] redundant headers deleted

---
 paddle/phi/kernels/onednn/transpose_grad_kernel.cc | 2 --
 paddle/phi/kernels/onednn/transpose_kernel.cc      | 1 -
 2 files changed, 3 deletions(-)

diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
index ec0f37120d20ec..dafbb75dc07ac5 100644
--- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
@@ -13,8 +13,6 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/transpose_grad_kernel.h"
-
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"
 
diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc
index 3cfac3d5b48364..a36d5e4493a549 100644
--- a/paddle/phi/kernels/onednn/transpose_kernel.cc
+++ b/paddle/phi/kernels/onednn/transpose_kernel.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/transpose_kernel.h"
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"