Skip to content

Commit 915b06b

Browse files
authored
[hip] Re-land #19082 and #19074 (#19101)
Fixes an issue with the async allocation/deallocation where we did not issue an execution to kick off the worker thread. Depending on workload ordering this sometimes could cause a deadlock. --------- Signed-off-by: Andrew Woloszyn <andrew.woloszyn@gmail.com>
1 parent c0dff68 commit 915b06b

11 files changed

+610
-102
lines changed

runtime/src/iree/hal/drivers/cuda/cuda_device.c

+2
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,8 @@ static const iree_hal_deferred_work_queue_device_interface_vtable_t
11711171
iree_hal_cuda_deferred_work_queue_device_interface_create_stream_command_buffer,
11721172
.submit_command_buffer =
11731173
iree_hal_cuda_deferred_work_queue_device_interface_submit_command_buffer,
1174+
.async_alloc = NULL,
1175+
.async_dealloc = NULL,
11741176
};
11751177

11761178
static const iree_hal_stream_tracing_device_interface_vtable_t

runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocFromPoolAsync, void **, size_t,
7373
hipMemPool_t, hipStream_t)
7474
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocManaged, hipDeviceptr_t *, size_t,
7575
unsigned int)
76+
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocAsync, void **, size_t, hipStream_t)
7677
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpy, void *, const void *, size_t,
7778
hipMemcpyKind)
7879
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpyAsync, void *, const void *, size_t,

runtime/src/iree/hal/drivers/hip/hip_allocator.c

+51
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ static void iree_hal_hip_allocator_destroy(
111111
IREE_TRACE_ZONE_END(z0);
112112
}
113113

114+
bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value) {
115+
return iree_hal_resource_is(base_value, &iree_hal_hip_allocator_vtable);
116+
}
117+
114118
static iree_allocator_t iree_hal_hip_allocator_host_allocator(
115119
const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
116120
iree_hal_hip_allocator_t* allocator =
@@ -590,6 +594,53 @@ static iree_status_t iree_hal_hip_allocator_export_buffer(
590594
}
591595
}
592596

597+
iree_status_t iree_hal_hip_allocator_alloc_async(
598+
iree_hal_allocator_t* base_allocator, hipStream_t stream,
599+
iree_hal_buffer_t* buffer) {
600+
iree_hal_hip_allocator_t* allocator =
601+
iree_hal_hip_allocator_cast(base_allocator);
602+
603+
hipDeviceptr_t ptr = NULL;
604+
iree_status_t status = IREE_HIP_RESULT_TO_STATUS(
605+
allocator->symbols,
606+
hipMallocAsync(&ptr, (size_t)iree_hal_buffer_allocation_size(buffer),
607+
stream),
608+
"hipMallocAsync");
609+
if (iree_status_is_ok(status)) {
610+
iree_hal_hip_buffer_set_device_pointer(buffer, ptr);
611+
IREE_TRACE_ALLOC_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)ptr,
612+
iree_hal_buffer_allocation_size(buffer));
613+
IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
614+
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
615+
iree_hal_buffer_allocation_size(buffer)));
616+
} else {
617+
iree_hal_hip_buffer_set_allocation_empty(buffer);
618+
}
619+
620+
return status;
621+
}
622+
623+
iree_status_t iree_hal_hip_allocator_free_async(
624+
iree_hal_allocator_t* base_allocator, hipStream_t stream,
625+
iree_hal_buffer_t* buffer) {
626+
iree_hal_hip_allocator_t* allocator =
627+
iree_hal_hip_allocator_cast(base_allocator);
628+
hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
629+
if (!device_ptr) {
630+
return iree_ok_status();
631+
}
632+
633+
IREE_RETURN_IF_ERROR(IREE_HIP_RESULT_TO_STATUS(
634+
allocator->symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync"));
635+
iree_hal_hip_buffer_set_allocation_empty(buffer);
636+
637+
IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr);
638+
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
639+
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
640+
iree_hal_buffer_allocation_size(buffer)));
641+
return iree_ok_status();
642+
}
643+
593644
static const iree_hal_allocator_vtable_t iree_hal_hip_allocator_vtable = {
594645
.destroy = iree_hal_hip_allocator_destroy,
595646
.host_allocator = iree_hal_hip_allocator_host_allocator,

runtime/src/iree/hal/drivers/hip/hip_allocator.h

+10
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ iree_status_t iree_hal_hip_allocator_create(
2525
hipStream_t stream, iree_hal_hip_memory_pools_t* pools,
2626
iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);
2727

28+
bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value);
29+
30+
iree_status_t iree_hal_hip_allocator_alloc_async(
31+
iree_hal_allocator_t* base_allocator, hipStream_t stream,
32+
iree_hal_buffer_t* buffer);
33+
34+
iree_status_t iree_hal_hip_allocator_free_async(iree_hal_allocator_t* allocator,
35+
hipStream_t stream,
36+
iree_hal_buffer_t* buffer);
37+
2838
#ifdef __cplusplus
2939
} // extern "C"
3040
#endif // __cplusplus

runtime/src/iree/hal/drivers/hip/hip_buffer.c

+42-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <string.h>
1212

1313
#include "iree/base/api.h"
14+
#include "iree/base/internal/synchronization.h"
1415
#include "iree/base/tracing.h"
1516

1617
typedef struct iree_hal_hip_buffer_t {
@@ -19,6 +20,9 @@ typedef struct iree_hal_hip_buffer_t {
1920
void* host_ptr;
2021
hipDeviceptr_t device_ptr;
2122
iree_hal_buffer_release_callback_t release_callback;
23+
iree_slim_mutex_t device_ptr_lock;
24+
iree_notification_t device_ptr_notification;
25+
bool empty;
2226
} iree_hal_hip_buffer_t;
2327

2428
static const iree_hal_buffer_vtable_t iree_hal_hip_buffer_vtable;
@@ -65,13 +69,36 @@ iree_status_t iree_hal_hip_buffer_wrap(
6569
buffer->host_ptr = host_ptr;
6670
buffer->device_ptr = device_ptr;
6771
buffer->release_callback = release_callback;
72+
buffer->empty = false;
73+
iree_slim_mutex_initialize(&buffer->device_ptr_lock);
74+
iree_notification_initialize(&buffer->device_ptr_notification);
6875
*out_buffer = &buffer->base;
6976
}
7077

7178
IREE_TRACE_ZONE_END(z0);
7279
return status;
7380
}
7481

82+
void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* base_buffer,
83+
hipDeviceptr_t pointer) {
84+
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
85+
IREE_ASSERT(buffer->device_ptr == NULL,
86+
"Cannot set a device_ptr to a buffer that already has one");
87+
iree_slim_mutex_lock(&buffer->device_ptr_lock);
88+
buffer->device_ptr = pointer;
89+
iree_slim_mutex_unlock(&buffer->device_ptr_lock);
90+
iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
91+
}
92+
93+
void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* base_buffer) {
94+
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
95+
iree_slim_mutex_lock(&buffer->device_ptr_lock);
96+
buffer->empty = true;
97+
buffer->device_ptr = NULL;
98+
iree_slim_mutex_unlock(&buffer->device_ptr_lock);
99+
iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
100+
}
101+
75102
static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
76103
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
77104
iree_allocator_t host_allocator = base_buffer->host_allocator;
@@ -80,6 +107,8 @@ static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
80107
buffer->release_callback.fn(buffer->release_callback.user_data,
81108
base_buffer);
82109
}
110+
iree_slim_mutex_deinitialize(&buffer->device_ptr_lock);
111+
iree_notification_deinitialize(&buffer->device_ptr_notification);
83112
iree_allocator_free(host_allocator, buffer);
84113
IREE_TRACE_ZONE_END(z0);
85114
}
@@ -143,10 +172,20 @@ iree_hal_hip_buffer_type_t iree_hal_hip_buffer_type(
143172
return buffer->type;
144173
}
145174

175+
static bool iree_hal_hip_buffer_has_device_ptr(void* arg) {
176+
iree_hal_hip_buffer_t* buffer = (iree_hal_hip_buffer_t*)arg;
177+
iree_slim_mutex_lock(&buffer->device_ptr_lock);
178+
bool has_ptr_or_error = buffer->device_ptr || buffer->empty;
179+
iree_slim_mutex_unlock(&buffer->device_ptr_lock);
180+
return has_ptr_or_error;
181+
}
182+
146183
hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
147-
const iree_hal_buffer_t* base_buffer) {
148-
const iree_hal_hip_buffer_t* buffer =
149-
iree_hal_hip_buffer_const_cast(base_buffer);
184+
iree_hal_buffer_t* base_buffer) {
185+
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
186+
iree_notification_await(&buffer->device_ptr_notification,
187+
iree_hal_hip_buffer_has_device_ptr, buffer,
188+
iree_infinite_timeout());
150189
return buffer->device_ptr;
151190
}
152191

runtime/src/iree/hal/drivers/hip/hip_buffer.h

+10-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,16 @@ iree_hal_hip_buffer_type_t iree_hal_hip_buffer_type(
4949
// Returns the HIP base pointer for the given |buffer|.
5050
// This is the entire allocated_buffer and must be offset by the buffer
5151
// byte_offset and byte_length when used.
52-
hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
53-
const iree_hal_buffer_t* buffer);
52+
hipDeviceptr_t iree_hal_hip_buffer_device_pointer(iree_hal_buffer_t* buffer);
53+
54+
// Sets the HIP base pointer for the given |buffer|.
55+
// This is the entire allocated_buffer and must be offset by the buffer
56+
// byte_offset and byte_length when used.
57+
void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* buffer,
58+
hipDeviceptr_t pointer);
59+
60+
// Marks the buffer as having an intentionally empty allocation.
61+
void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* buffer);
5462

5563
// Returns the HIP host pointer for the given |buffer|, if available.
5664
void* iree_hal_hip_buffer_host_pointer(const iree_hal_buffer_t* buffer);

0 commit comments

Comments
 (0)