Skip to content

Commit 239759d

Browse files
authored
Bring back the minimal implementations of runtime API. (#45)
* [Fix] Handle stream correctly. * WIP * Fix fatbin. * Revert. * wip * Remove redundant functions.
1 parent 0b2cac4 commit 239759d

File tree

8 files changed

+6232
-32
lines changed

8 files changed

+6232
-32
lines changed

Cargo.lock

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ members = [
4141
"zluda_redirect",
4242
"zluda_rt",
4343
"zluda_rtc",
44+
"zluda_runtime",
4445
"zluda_sparse",
4546
]
4647

hip_runtime-sys/src/hip_runtime_api_v6.rs

+1-20
Original file line numberDiff line numberDiff line change
@@ -7495,17 +7495,9 @@ extern "C" {
74957495
extern "C" {
74967496
#[must_use]
74977497
pub fn __hipRegisterFatBinary(
7498-
data: *mut ::std::os::raw::c_void,
7498+
data: *const ::std::os::raw::c_void,
74997499
) -> *mut *mut ::std::os::raw::c_void;
75007500
}
7501-
/*
7502-
extern "C" {
7503-
#[must_use]
7504-
pub fn __hipRegisterFatBinaryEnd(
7505-
fatCubinHandle: *mut *mut ::std::os::raw::c_void,
7506-
) -> ::std::os::raw::c_void;
7507-
}
7508-
*/
75097501
extern "C" {
75107502
#[must_use]
75117503
pub fn __hipRegisterFunction(
@@ -7521,17 +7513,6 @@ extern "C" {
75217513
wSize: *mut ::std::os::raw::c_int,
75227514
) -> ::std::os::raw::c_void;
75237515
}
7524-
/*
7525-
extern "C" {
7526-
#[must_use]
7527-
pub fn __hipRegisterHostVar(
7528-
fatCubinHandle: *mut *mut ::std::os::raw::c_void,
7529-
deviceName: *const ::std::os::raw::c_char,
7530-
hostVar: *mut ::std::os::raw::c_char,
7531-
size: usize,
7532-
) -> ::std::os::raw::c_void;
7533-
}
7534-
*/
75357516
extern "C" {
75367517
#[must_use]
75377518
pub fn __hipRegisterManagedVar(

zluda_inject/src/bin.rs

+10-12
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,15 @@ struct ProgramArguments {
8181
pub fn main_impl() -> Result<(), Box<dyn Error>> {
8282
for argument in env::args_os() {
8383
match argument.to_str() {
84-
Some(argument) =>
85-
match argument {
86-
"--version" => {
87-
println!("ZLUDA 3.8.3");
88-
process::exit(0);
89-
},
90-
"--" => break,
91-
_ => {},
92-
},
93-
None => {},
84+
Some(argument) => match argument {
85+
"--version" => {
86+
println!("ZLUDA 3.8.3");
87+
process::exit(0);
88+
}
89+
"--" => break,
90+
_ => {}
91+
},
92+
None => {}
9493
}
9594
}
9695

@@ -202,8 +201,7 @@ struct NormalizedArguments {
202201
impl NormalizedArguments {
203202
fn new(prog_args: ProgramArguments) -> Result<Self, Box<dyn Error>> {
204203
let current_exe = env::current_exe()?;
205-
let nccl_path =
206-
Self::get_absolute_path_or_default(&current_exe, prog_args.nccl, NCCL_DLL)?;
204+
let nccl_path = Self::get_absolute_path_or_default(&current_exe, prog_args.nccl, NCCL_DLL)?;
207205
let nvrtc_path = prog_args.nvrtc.map(Self::get_absolute_path).transpose()?;
208206
let nvcuda_path =
209207
Self::get_absolute_path_or_default(&current_exe, prog_args.nvcuda, NVCUDA_DLL)?;

zluda_runtime/Cargo.toml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[package]
2+
name = "zluda_runtime"
3+
version = "0.0.0"
4+
authors = ["Seunghoon Lee <op@lsh.sh>"]
5+
edition = "2018"
6+
7+
[lib]
8+
name = "cudart"
9+
crate-type = ["cdylib"]
10+
11+
[features]
12+
rocm5 = ["hip_common/rocm5", "hip_runtime-sys/rocm5", "zluda_dark_api/rocm5"]
13+
14+
[dependencies]
15+
cuda_types = { path = "../cuda_types" }
16+
hip_common = { path = "../hip_common" }
17+
hip_runtime-sys = { path = "../hip_runtime-sys" }
18+
zluda_dark_api = { path = "../zluda_dark_api" }
19+
20+
[package.metadata.zluda]
21+
linux_names = ["libcudart.so.10", "libcudart.so.11"]
22+
dump_names = ["libcudart.so"]

zluda_runtime/README

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
bindgen include/cuda_runtime.h -o src/cudart.rs --allowlist-function="^cuda.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I""
2+
sed -i -e 's/extern "C" {//g' -e 's/-> cudaError_t;/-> cudaError_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cudart.rs
3+
bindgen include/cuda_profiler_api.h -o src/profiler.rs --allowlist-function="^cuda.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I""
4+
sed -i -e 's/extern "C" {//g' -e 's/-> cudaError_t;/-> cudaError_t { crate::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/profiler.rs

zluda_runtime/src/cudart.rs

+5,976
Large diffs are not rendered by default.

zluda_runtime/src/lib.rs

+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
#![allow(warnings)]
2+
mod cudart;
3+
pub use cudart::*;
4+
5+
use hip_runtime_sys::*;
6+
use std::mem;
7+
8+
#[cfg(debug_assertions)]
9+
fn unsupported() -> cudaError_t {
10+
unimplemented!()
11+
}
12+
13+
#[cfg(not(debug_assertions))]
14+
fn unsupported() -> cudaError_t {
15+
cudaError_t::cudaErrorNotSupported
16+
}
17+
18+
fn to_cuda(status: hipError_t) -> cudaError_t {
19+
match status {
20+
hipError_t::hipSuccess => cudaError_t::cudaSuccess,
21+
hipError_t::hipErrorInvalidValue => cudaError_t::cudaErrorInvalidValue,
22+
hipError_t::hipErrorOutOfMemory => cudaError_t::cudaErrorMemoryAllocation,
23+
hipError_t::hipErrorInvalidContext => cudaError_t::cudaErrorDeviceUninitialized,
24+
hipError_t::hipErrorInvalidResourceHandle => cudaError_t::cudaErrorInvalidResourceHandle,
25+
hipError_t::hipErrorNotSupported => cudaError_t::cudaErrorNotSupported,
26+
err => panic!("[ZLUDA] HIP Runtime failed: {}", err.0),
27+
}
28+
}
29+
30+
unsafe fn to_stream(stream: cudaStream_t) -> hipStream_t {
31+
let lib = hip_common::zluda_ext::get_cuda_library().unwrap();
32+
let cu_get_export_table = lib
33+
.get::<unsafe extern "C" fn(
34+
ppExportTable: *mut *const ::std::os::raw::c_void,
35+
pExportTableId: *const cuda_types::CUuuid,
36+
) -> cuda_types::CUresult>(b"cuGetExportTable\0")
37+
.unwrap();
38+
let mut export_table = std::ptr::null();
39+
let error = (cu_get_export_table)(&mut export_table, &zluda_dark_api::ZludaExt::GUID);
40+
assert_eq!(error, cuda_types::CUresult::CUDA_SUCCESS);
41+
let zluda_ext = zluda_dark_api::ZludaExt::new(export_table);
42+
let maybe_hip_stream: Result<_, _> = zluda_ext.get_hip_stream(stream as _).into();
43+
maybe_hip_stream.unwrap() as _
44+
}
45+
46+
fn memcpy_kind(kind: cudaMemcpyKind) -> hipMemcpyKind {
47+
match kind {
48+
cudaMemcpyKind::cudaMemcpyHostToHost => hipMemcpyKind::hipMemcpyHostToHost,
49+
cudaMemcpyKind::cudaMemcpyHostToDevice => hipMemcpyKind::hipMemcpyHostToDevice,
50+
cudaMemcpyKind::cudaMemcpyDeviceToHost => hipMemcpyKind::hipMemcpyDeviceToHost,
51+
cudaMemcpyKind::cudaMemcpyDeviceToDevice => hipMemcpyKind::hipMemcpyDeviceToDevice,
52+
cudaMemcpyKind::cudaMemcpyDefault => hipMemcpyKind::hipMemcpyDefault,
53+
_ => panic!(),
54+
}
55+
}
56+
57+
fn to_cuda_stream_capture_status(status: hipStreamCaptureStatus) -> cudaStreamCaptureStatus {
58+
match status {
59+
hipStreamCaptureStatus::hipStreamCaptureStatusNone => {
60+
cudaStreamCaptureStatus::cudaStreamCaptureStatusNone
61+
}
62+
hipStreamCaptureStatus::hipStreamCaptureStatusActive => {
63+
cudaStreamCaptureStatus::cudaStreamCaptureStatusActive
64+
}
65+
hipStreamCaptureStatus::hipStreamCaptureStatusInvalidated => {
66+
cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated
67+
}
68+
_ => panic!(),
69+
}
70+
}
71+
72+
unsafe fn register_fat_binary(
73+
fat_cubin: *mut ::std::os::raw::c_void,
74+
) -> *mut *mut ::std::os::raw::c_void {
75+
__hipRegisterFatBinary(fat_cubin)
76+
}
77+
78+
unsafe fn register_function(
79+
fat_cubin_handle: *mut *mut ::std::os::raw::c_void,
80+
host_fun: *const ::std::os::raw::c_char,
81+
device_fun: *mut ::std::os::raw::c_char,
82+
device_name: *const ::std::os::raw::c_char,
83+
thread_limit: i32,
84+
tid: *mut uint3,
85+
bid: *mut uint3,
86+
b_dim: *mut cudart::dim3,
87+
g_dim: *mut cudart::dim3,
88+
w_size: *mut i32,
89+
) -> ::std::os::raw::c_void {
90+
__hipRegisterFunction(
91+
fat_cubin_handle,
92+
host_fun.cast(),
93+
device_fun,
94+
device_name,
95+
thread_limit as _,
96+
tid.cast(),
97+
bid.cast(),
98+
b_dim.cast(),
99+
g_dim.cast(),
100+
w_size,
101+
)
102+
}
103+
104+
unsafe fn register_var(
105+
fat_cubin_handle: *mut *mut ::std::os::raw::c_void,
106+
host_var: *mut ::std::os::raw::c_char,
107+
device_address: *mut ::std::os::raw::c_char,
108+
device_name: *const ::std::os::raw::c_char,
109+
ext: i32,
110+
size: usize,
111+
constant: i32,
112+
global: i32,
113+
) -> ::std::os::raw::c_void {
114+
__hipRegisterVar(
115+
fat_cubin_handle,
116+
device_address.cast(),
117+
host_var,
118+
device_name.cast_mut(),
119+
ext,
120+
size,
121+
constant,
122+
global,
123+
)
124+
}
125+
126+
unsafe fn unregister_fat_binary(
127+
fat_cubin_handle: *mut *mut ::std::os::raw::c_void,
128+
) -> ::std::os::raw::c_void {
129+
__hipUnregisterFatBinary(fat_cubin_handle)
130+
}
131+
132+
unsafe fn device_get_stream_priority_range(
133+
least_priority: *mut i32,
134+
greatest_priority: *mut i32,
135+
) -> cudaError_t {
136+
to_cuda(hipDeviceGetStreamPriorityRange(
137+
least_priority,
138+
greatest_priority,
139+
))
140+
}
141+
142+
unsafe fn get_last_error() -> cudaError_t {
143+
to_cuda(hipGetLastError())
144+
}
145+
146+
unsafe fn get_device_count(count: *mut i32) -> cudaError_t {
147+
to_cuda(hipGetDeviceCount(count))
148+
}
149+
150+
unsafe fn get_device(device: *mut i32) -> cudaError_t {
151+
to_cuda(hipGetDevice(device))
152+
}
153+
154+
unsafe fn stream_create_with_priority(
155+
p_stream: *mut cudaStream_t,
156+
flags: u32,
157+
priority: i32,
158+
) -> cudaError_t {
159+
let lib = hip_common::zluda_ext::get_cuda_library().unwrap();
160+
let cu_stream_create_with_priority = lib
161+
.get::<unsafe extern "C" fn(
162+
phStream: *mut cuda_types::CUstream,
163+
flags: ::std::os::raw::c_uint,
164+
priority: ::std::os::raw::c_int,
165+
) -> cuda_types::CUresult>(b"cuStreamCreateWithPriority\0")
166+
.unwrap();
167+
cudaError_t((cu_stream_create_with_priority)(p_stream.cast(), flags, priority).0)
168+
}
169+
170+
unsafe fn stream_synchronize(stream: cudaStream_t) -> cudaError_t {
171+
let stream = to_stream(stream);
172+
to_cuda(hipStreamSynchronize(stream))
173+
}
174+
175+
unsafe fn stream_is_capturing(
176+
stream: cudaStream_t,
177+
p_capture_status: *mut cudaStreamCaptureStatus,
178+
) -> cudaError_t {
179+
let stream = to_stream(stream);
180+
let mut capture_status = mem::zeroed();
181+
let status = to_cuda(hipStreamIsCapturing(stream, &mut capture_status));
182+
*p_capture_status = to_cuda_stream_capture_status(capture_status);
183+
status
184+
}
185+
186+
unsafe fn malloc(dev_ptr: *mut *mut ::std::os::raw::c_void, size: usize) -> cudaError_t {
187+
to_cuda(hipMalloc(dev_ptr, size))
188+
}
189+
190+
unsafe fn free(dev_ptr: *mut ::std::os::raw::c_void) -> cudaError_t {
191+
to_cuda(hipFree(dev_ptr))
192+
}
193+
194+
unsafe fn mem_get_info(free: *mut usize, total: *mut usize) -> cudaError_t {
195+
to_cuda(hipMemGetInfo(free, total))
196+
}
197+
198+
unsafe fn memcpy_async(
199+
dst: *mut ::std::os::raw::c_void,
200+
src: *const ::std::os::raw::c_void,
201+
count: usize,
202+
kind: cudaMemcpyKind,
203+
stream: cudaStream_t,
204+
) -> cudaError_t {
205+
let kind = memcpy_kind(kind);
206+
let stream = to_stream(stream);
207+
to_cuda(hipMemcpyAsync(dst, src, count, kind, stream))
208+
}

0 commit comments

Comments
 (0)