Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HACKATHON 6th] Refactor phi module structure #64541

Open
wants to merge 17 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ function(paddle_test_build TARGET_NAME)
add_dependencies(${TARGET_NAME} onednn)
endif()
if(WITH_SHARED_PHI)
target_link_libraries(${TARGET_NAME} $<TARGET_LINKER_FILE:phi>)
target_link_libraries(${TARGET_NAME} phi)
add_dependencies(${TARGET_NAME} phi)
endif()
if(WITH_SHARED_IR)
Expand Down
4 changes: 2 additions & 2 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,10 @@ else()
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
if(WITH_SHARED_PHI)
set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
set(paddle_phi_libs ${PADDLE_BINARY_DIR}/paddle/phi/libphi*)
copy(
inference_lib_dist
SRCS ${paddle_phi_lib}
SRCS ${paddle_phi_libs}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()
endif()
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/eager/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set(eager_deps
phi
phi_core
common
hook_utils
utils
Expand All @@ -13,6 +14,10 @@ set(eager_deps
grad_tensor_holder
custom_operator_node)

if(WITH_GPU OR WITH_ROCM)
set(eager_deps ${eager_deps} phi_gpu)
endif()

if(NOT (NOT WITH_PYTHON AND ON_INFER))
set(eager_deps ${eager_deps} accumulation_node prim_utils)
endif()
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/eager/auto_code_generator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ if(WIN32)
OUTPUT ${eager_generator_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${eager_generator_path}
DEPENDS phi)
add_custom_command(
OUTPUT ${eager_generator_path}/phi_core.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_CORE_LIB} ${eager_generator_path}
DEPENDS phi)
if(WITH_GPU OR WITH_ROCM)
add_custom_command(
OUTPUT ${eager_generator_path}/phi_gpu.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_GPU_LIB} ${eager_generator_path}
DEPENDS phi)
endif()
list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/phi.dll)
endif()

Expand Down
7 changes: 5 additions & 2 deletions paddle/fluid/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ list(REMOVE_ITEM fluid_modules cinn_op_dialect)
# shared library to prune library size.
# list(REMOVE_ITEM fluid_modules ${not_infer_modules})

set(SHARED_INFERENCE_DEPS phi common ${fluid_modules} analysis_predictor
${utils_modules})
set(SHARED_INFERENCE_DEPS phi phi_core common ${fluid_modules}
analysis_predictor ${utils_modules})
if(WITH_GPU OR WITH_ROCM)
list(APPEND SHARED_INFERENCE_DEPS phi_gpu)
endif()
if(NOT WIN32)
list(APPEND SHARED_INFERENCE_DEPS ${ir_targets})
endif()
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/api/demo_ci/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,13 @@ if(NOT WIN32)
if(WITH_SHARED_PHI)
set(DEPS
${DEPS} ${PADDLE_LIB}/paddle/lib/libphi${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_LIB}/paddle/lib/libphi_core${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_LIB}/paddle/lib/libcommon${CMAKE_SHARED_LIBRARY_SUFFIX})
if(WITH_GPU OR WITH_ROCM)
set(DEPS
${DEPS}
${PADDLE_LIB}/paddle/lib/libphi_gpu${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
else()
set(DEPS
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/pybind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,16 @@ if(WITH_PYTHON)
OUTPUT ${op_impl_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${op_impl_path}
DEPENDS phi)
add_custom_command(
OUTPUT ${op_impl_path}/phi_core.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_CORE_LIB} ${op_impl_path}
DEPENDS phi)
if(WITH_GPU OR WITH_ROCM)
add_custom_command(
OUTPUT ${op_impl_path}/phi_gpu.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_GPU_LIB} ${op_impl_path}
DEPENDS phi)
endif()
list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/phi.dll)
endif()

Expand Down
82 changes: 66 additions & 16 deletions paddle/phi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ if(WITH_DGC)
list(APPEND PHI_DEPS dgc)
endif()

set(PHI_SRCS
set(PHI_CORE_SRCS
${common_srcs}
${api_srcs}
${core_srcs}
Expand All @@ -112,6 +112,8 @@ set(PHI_SRCS
${infermeta_srcs}
${capi_srcs})

set(PHI_GPU_SRCS ${kernels_gpu_srcs})

if(WITH_SHARED_PHI)
set(PHI_BUILD_TYPE
SHARED
Expand Down Expand Up @@ -140,37 +142,67 @@ if(WITH_GPU)
PROPERTIES COMPILE_FLAGS
"-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
nv_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})

nv_library(
phi_gpu ${PHI_BUILD_TYPE}
SRCS ${PHI_GPU_SRCS}
DEPS ${PHI_DEPS} phi_core)
elseif(WITH_ROCM)
hip_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})

hip_library(
phi_gpu ${PHI_BUILD_TYPE}
SRCS ${PHI_GPU_SRCS}
DEPS ${PHI_DEPS} phi_core)
elseif(WITH_XPU_KP)
xpu_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})
else()
cc_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})
endif()

target_compile_definitions(phi PUBLIC PHI_INNER)
set(PHI_DUMMY_FILE ${CMAKE_CURRENT_BINARY_DIR}/phi_dummy.cpp)
if(MSVC)
set(PHI_DUMMY_FILE_CONTENT
"__declspec(dllexport) int phi_dummy_placeholder_function(){ return 0\\; }"
)
else()
set(PHI_DUMMY_FILE_CONTENT "")
endif()
file(WRITE ${PHI_DUMMY_FILE} ${PHI_DUMMY_FILE_CONTENT})

add_library(phi ${PHI_BUILD_TYPE} ${PHI_DUMMY_FILE})
target_link_libraries(phi phi_core)
if(WITH_GPU OR WITH_ROCM)
target_link_libraries(phi phi_gpu)
endif()

# Note(silverling): some functions in phi_core depend on phi_gpu,
# when phi is built to dynamic library, it's fine. But when phi
# is built to static library, phi_gpu should be linked to phi_core.
# By the way, cyclic dependency is allowed in static library.
if(WITH_ROCM AND NOT WITH_SHARED_PHI)
target_link_libraries(phi_core phi_gpu)
endif()

target_compile_definitions(phi_core PUBLIC PHI_INNER)

if(WIN32)
target_link_libraries(phi shlwapi.lib)
target_link_libraries(phi_core shlwapi.lib)
endif()

if(WIN32)
if(WITH_SHARED_PHI)
set_property(TARGET phi PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
set_property(TARGET phi_core PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(PHI_NAME
phi.dll
CACHE INTERNAL "" FORCE)
Expand Down Expand Up @@ -203,10 +235,28 @@ endif()

set(PHI_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}"
CACHE FILEPATH "PHI Library" FORCE)
CACHE FILEPATH "PHI Dummy Library" FORCE)

string(REPLACE "phi" "phi_core" PHI_CORE_NAME ${PHI_NAME})
set(PHI_CORE_NAME
${PHI_CORE_NAME}
CACHE INTERNAL "" FORCE)
set(PHI_CORE_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_CORE_NAME}"
CACHE FILEPATH "PHI CPU Library" FORCE)

if(WITH_GPU OR WITH_ROCM)
string(REPLACE "phi" "phi_gpu" PHI_GPU_NAME ${PHI_NAME})
set(PHI_GPU_NAME
${PHI_GPU_NAME}
CACHE INTERNAL "" FORCE)
set(PHI_GPU_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_GPU_NAME}"
CACHE FILEPATH "PHI GPU Library" FORCE)
endif()

if(MKL_FOUND AND WITH_ONEMKL)
target_include_directories(phi PRIVATE ${MKL_INCLUDE})
target_include_directories(phi_core PRIVATE ${MKL_INCLUDE})
endif()

add_dependencies(phi extern_lapack)
Expand Down
30 changes: 20 additions & 10 deletions paddle/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ file(GLOB kernel_primitive_h "primitive/*.h")
file(
GLOB kernel_cu
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"gpu/*.cu"
"gpu/*.cu.cc"
"gpudnn/*.cu"
"kps/*.cu"
"legacy/kps/*.cu"
Expand All @@ -40,18 +38,25 @@ file(
"strings/gpu/*.cu"
"fusion/gpu/*.cu")

file(
GLOB kernel_gpu
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"gpu/*.cu" "gpu/*.cu.cc")

if(APPLE OR WIN32)
list(REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu")
list(REMOVE_ITEM kernel_cu "sparse/gpu/conv_kernel_igemm.cu")
endif()

if(NOT WITH_DGC)
list(REMOVE_ITEM kernel_cu "gpu/dgc_kernel.cu")
list(REMOVE_ITEM kernel_gpu "gpu/dgc_kernel.cu")
endif()

if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$")
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$")
list(FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\.cc$")
list(FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\.cu$")
endif()

if(WITH_CUTLASS)
Expand Down Expand Up @@ -201,6 +206,15 @@ if(WITH_ROCM)
list(
REMOVE_ITEM
kernel_cu
"gpudnn/mha_cudnn_frontend.cu"
"fusion/gpu/blha_get_max_len.cu"
"fusion/gpu/block_multi_head_attention_kernel.cu"
"fusion/gpu/fused_bn_add_activation_grad_kernel.cu"
"fusion/gpu/fused_bn_add_activation_kernel.cu"
"fusion/gpu/fusion_transpose_flatten_concat_kernel.cu")
list(
REMOVE_ITEM
kernel_gpu
"gpu/affine_grid_grad_kernel.cu"
"gpu/apply_per_channel_scale_kernel.cu"
"gpu/cholesky_solve_kernel.cu"
Expand All @@ -213,13 +227,7 @@ if(WITH_ROCM)
"gpu/put_along_axis_grad_kernel.cu"
"gpu/put_along_axis_kernel.cu"
"gpu/qr_kernel.cu"
"gpu/svd_kernel.cu"
"gpudnn/mha_cudnn_frontend.cu"
"fusion/gpu/blha_get_max_len.cu"
"fusion/gpu/block_multi_head_attention_kernel.cu"
"fusion/gpu/fused_bn_add_activation_grad_kernel.cu"
"fusion/gpu/fused_bn_add_activation_kernel.cu"
"fusion/gpu/fusion_transpose_flatten_concat_kernel.cu")
"gpu/svd_kernel.cu")
endif()

set(cc_search_pattern
Expand Down Expand Up @@ -276,6 +284,8 @@ file(
if(WITH_GPU OR WITH_ROCM)
collect_srcs(kernels_srcs SRCS ${kernel_cu})
kernel_declare("${kernel_cu}")
collect_srcs(kernels_gpu_srcs SRCS ${kernel_gpu})
kernel_declare("${kernel_gpu}")
endif()

if(WITH_XPU)
Expand Down
4 changes: 4 additions & 0 deletions python/env_dict.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ env_dict={
'FLUID_CORE_NAME':'@FLUID_CORE_NAME@',
'PHI_LIB':'@PHI_LIB@',
'PHI_NAME':'@PHI_NAME@',
'PHI_CORE_LIB':'@PHI_CORE_LIB@',
'PHI_CORE_NAME':'@PHI_CORE_NAME@',
'PHI_GPU_LIB':'@PHI_GPU_LIB@',
'PHI_GPU_NAME':'@PHI_GPU_NAME@',
'WITH_SHARED_PHI':'@WITH_SHARED_PHI@',
'IR_LIB':'@IR_LIB@',
'IR_NAME':'@IR_NAME@',
Expand Down
12 changes: 12 additions & 0 deletions python/setup.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,11 @@ package_data['paddle.libs']= []
if('${WITH_SHARED_PHI}' == 'ON'):
package_data['paddle.libs'] += [('libphi' if os.name != 'nt' else 'phi') + ext_name]
shutil.copy('${PHI_LIB}', libs_path)
package_data['paddle.libs'] += [('libphi_core' if os.name != 'nt' else 'phi_core') + ext_name]
shutil.copy('${PHI_CORE_LIB}', libs_path)
if('${WTIH_GPU}' == 'ON' or '${WTIH_ROCM}' == 'ON'):
package_data['paddle.libs'] += [('libphi_gpu' if os.name != 'nt' else 'phi_gpu') + ext_name]
shutil.copy('${PHI_GPU_LIB}', libs_path)

if('${WITH_SHARED_IR}' == 'ON'):
package_data['paddle.libs'] += [('libpir' if os.name != 'nt' else 'pir') + ext_name]
Expand Down Expand Up @@ -840,6 +845,9 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
if('${WITH_SHARED_PHI}' == 'ON'):
# change rpath of phi.ext for loading 3rd party libb
commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}")
commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_CORE_NAME}")
if('${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON'):
commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_GPU_NAME}")
if('${WITH_SHARED_IR}' == 'ON'):
# change rpath of pir.ext for loading 3rd party libb
commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${IR_NAME}")
Expand All @@ -848,6 +856,10 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
if('${WITH_SHARED_PHI}' == 'ON'):
# change rpath of phi.ext for loading 3rd party lib
commands.append("patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN:$ORIGIN/../libs' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}")
commands.append("patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN:$ORIGIN/../libs' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_CORE_NAME}")
if('${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON'):
commands.append("patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN:$ORIGIN/../libs' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_GPU_NAME}")

if('${WITH_SHARED_IR}' == 'ON'):
# change rpath of pir.ext for loading 3rd party lib
commands.append("patchelf --set-rpath '$ORIGIN:$ORIGIN/../libs' ${PADDLE_BINARY_DIR}/python/paddle/libs/${IR_NAME}")
Expand Down
3 changes: 3 additions & 0 deletions python/setup_cinn.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ if '${WITH_ONEDNN}' == 'ON':
cinnlibs.append('${ONEDNN_SHARED_LIB}')

cinnlibs.append('${PHI_LIB}')
cinnlibs.append('${PHI_CORE_LIB}')
if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
cinnlibs.append('${PHI_GPU_LIB}')
cinnlibs.append('${IR_LIB}')
cinnlibs.append('${COMMON_LIB}')

Expand Down
Loading