|
| 1 | +//==---- test_contexts.cpp --- PI unit tests -------------------------------==// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#include "gtest/gtest.h" |
| 10 | + |
| 11 | +#include <condition_variable> |
| 12 | +#include <thread> |
| 13 | +#include <mutex> |
| 14 | + |
| 15 | +#include <cuda.h> |
| 16 | + |
| 17 | +#include "TestGetPlugin.hpp" |
| 18 | +#include <CL/sycl.hpp> |
| 19 | +#include <CL/sycl/detail/pi.hpp> |
| 20 | +#include <detail/plugin.hpp> |
| 21 | +#include <pi_cuda.hpp> |
| 22 | + |
| 23 | +using namespace cl::sycl; |
| 24 | + |
| 25 | +struct CudaContextsTest : public ::testing::Test { |
| 26 | + |
| 27 | +protected: |
| 28 | + detail::plugin *plugin = pi::initializeAndGet(backend::cuda); |
| 29 | + |
| 30 | + pi_platform platform_; |
| 31 | + pi_device device_; |
| 32 | + |
| 33 | + void SetUp() override { |
| 34 | + // skip the tests if the CUDA backend is not available |
| 35 | + if (!plugin) { |
| 36 | + GTEST_SKIP(); |
| 37 | + } |
| 38 | + |
| 39 | + pi_uint32 numPlatforms = 0; |
| 40 | + ASSERT_EQ(plugin->getBackend(), backend::cuda); |
| 41 | + |
| 42 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>( |
| 43 | + 0, nullptr, &numPlatforms)), |
| 44 | + PI_SUCCESS) |
| 45 | + << "piPlatformsGet failed.\n"; |
| 46 | + |
| 47 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>( |
| 48 | + numPlatforms, &platform_, nullptr)), |
| 49 | + PI_SUCCESS) |
| 50 | + << "piPlatformsGet failed.\n"; |
| 51 | + |
| 52 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>( |
| 53 | + platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), |
| 54 | + PI_SUCCESS); |
| 55 | + } |
| 56 | + |
| 57 | + void TearDown() override {} |
| 58 | + |
| 59 | + CudaContextsTest() = default; |
| 60 | + |
| 61 | + ~CudaContextsTest() = default; |
| 62 | +}; |
| 63 | + |
| 64 | +TEST_F(CudaContextsTest, ContextLifetime) { |
| 65 | + // start with no active context |
| 66 | + cuCtxSetCurrent(nullptr); |
| 67 | + |
| 68 | + // create a context |
| 69 | + pi_context context; |
| 70 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>( |
| 71 | + nullptr, 1, &device_, nullptr, nullptr, &context)), |
| 72 | + PI_SUCCESS); |
| 73 | + ASSERT_NE(context, nullptr); |
| 74 | + |
| 75 | + // create a queue from the context, this should use the ScopedContext |
| 76 | + pi_queue queue; |
| 77 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>( |
| 78 | + context, device_, 0, &queue)), |
| 79 | + PI_SUCCESS); |
| 80 | + ASSERT_NE(queue, nullptr); |
| 81 | + |
| 82 | + // ensure the queue has the correct context |
| 83 | + ASSERT_EQ(context, queue->get_context()); |
| 84 | + |
| 85 | + // check that the context is now the active CUDA context |
| 86 | + CUcontext cudaCtxt = nullptr; |
| 87 | + cuCtxGetCurrent(&cudaCtxt); |
| 88 | + ASSERT_EQ(cudaCtxt, context->get()); |
| 89 | + |
| 90 | + plugin->call<detail::PiApiKind::piQueueRelease>(queue); |
| 91 | + plugin->call<detail::PiApiKind::piContextRelease>(context); |
| 92 | + |
| 93 | + // check that the context was cleaned up properly by the destructor |
| 94 | + cuCtxGetCurrent(&cudaCtxt); |
| 95 | + ASSERT_EQ(cudaCtxt, nullptr); |
| 96 | +} |
| 97 | + |
| 98 | +TEST_F(CudaContextsTest, ContextLifetimeExisting) { |
| 99 | + // start by setting up a CUDA context on the thread |
| 100 | + CUcontext original; |
| 101 | + cuCtxCreate(&original, CU_CTX_MAP_HOST, device_->get()); |
| 102 | + |
| 103 | + // ensure the CUDA context is active |
| 104 | + CUcontext current = nullptr; |
| 105 | + cuCtxGetCurrent(¤t); |
| 106 | + ASSERT_EQ(original, current); |
| 107 | + |
| 108 | + // create a PI context |
| 109 | + pi_context context; |
| 110 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>( |
| 111 | + nullptr, 1, &device_, nullptr, nullptr, &context)), |
| 112 | + PI_SUCCESS); |
| 113 | + ASSERT_NE(context, nullptr); |
| 114 | + |
| 115 | + // create a queue from the context, this should use the ScopedContext |
| 116 | + pi_queue queue; |
| 117 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>( |
| 118 | + context, device_, 0, &queue)), |
| 119 | + PI_SUCCESS); |
| 120 | + ASSERT_NE(queue, nullptr); |
| 121 | + |
| 122 | + // ensure the queue has the correct context |
| 123 | + ASSERT_EQ(context, queue->get_context()); |
| 124 | + |
| 125 | + // check that the context is now the active CUDA context |
| 126 | + cuCtxGetCurrent(¤t); |
| 127 | + ASSERT_EQ(current, context->get()); |
| 128 | + |
| 129 | + plugin->call<detail::PiApiKind::piQueueRelease>(queue); |
| 130 | + plugin->call<detail::PiApiKind::piContextRelease>(context); |
| 131 | + |
| 132 | + // check that the context was cleaned up, the old context will be restored |
| 133 | + // automatically by cuCtxDestroy in piContextRelease, as it was pushed on the |
| 134 | + // stack bu cuCtxCreate |
| 135 | + cuCtxGetCurrent(¤t); |
| 136 | + ASSERT_EQ(current, original); |
| 137 | + |
| 138 | + // release original context |
| 139 | + cuCtxDestroy(original); |
| 140 | +} |
| 141 | + |
| 142 | +// In some cases (for host_task), the SYCL runtime may call PI API functions |
| 143 | +// from threads of the thread pool, this can cause issues because with the CUDA |
| 144 | +// plugin these functions will set an active CUDA context on these threads, but |
| 145 | +// never clean it up, as it will only get cleaned up in the main thread. |
| 146 | +// |
| 147 | +// So the following test aims to reproduce the scenario where there is a |
| 148 | +// dangling deleted context in a separate thread and seeing if the PI calls are |
| 149 | +// still able to work correctly in that thread. |
| 150 | +TEST_F(CudaContextsTest, ContextThread) { |
| 151 | + // start with no active context |
| 152 | + cuCtxSetCurrent(nullptr); |
| 153 | + |
| 154 | + // create two PI contexts |
| 155 | + pi_context context1; |
| 156 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>( |
| 157 | + nullptr, 1, &device_, nullptr, nullptr, &context1)), |
| 158 | + PI_SUCCESS); |
| 159 | + ASSERT_NE(context1, nullptr); |
| 160 | + |
| 161 | + pi_context context2; |
| 162 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>( |
| 163 | + nullptr, 1, &device_, nullptr, nullptr, &context2)), |
| 164 | + PI_SUCCESS); |
| 165 | + ASSERT_NE(context2, nullptr); |
| 166 | + |
| 167 | + // setup synchronization variables between the main thread and the testing |
| 168 | + // thread |
| 169 | + std::mutex m; |
| 170 | + std::condition_variable cv; |
| 171 | + bool released = false; |
| 172 | + bool thread_done = false; |
| 173 | + |
| 174 | + // create a testing thread that will create a queue with the first context, |
| 175 | + // release the queue, then wait for the main thread to release the first |
| 176 | + // context, and then create and release another queue with the second context |
| 177 | + // this time |
| 178 | + auto test_thread = std::thread([&] { |
| 179 | + CUcontext current = nullptr; |
| 180 | + |
| 181 | + // create a queue with the first context |
| 182 | + pi_queue queue; |
| 183 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>( |
| 184 | + context1, device_, 0, &queue)), |
| 185 | + PI_SUCCESS); |
| 186 | + ASSERT_NE(queue, nullptr); |
| 187 | + |
| 188 | + // ensure the queue has the correct context |
| 189 | + ASSERT_EQ(context1, queue->get_context()); |
| 190 | + |
| 191 | + // check that the first context is now the active CUDA context |
| 192 | + cuCtxGetCurrent(¤t); |
| 193 | + ASSERT_EQ(current, context1->get()); |
| 194 | + |
| 195 | + plugin->call<detail::PiApiKind::piQueueRelease>(queue); |
| 196 | + |
| 197 | + // mark the first set of processing as done and notify the main thread |
| 198 | + std::unique_lock<std::mutex> lock(m); |
| 199 | + thread_done = true; |
| 200 | + lock.unlock(); |
| 201 | + cv.notify_one(); |
| 202 | + |
| 203 | + // wait for the main thread to release the first context |
| 204 | + lock.lock(); |
| 205 | + cv.wait(lock, [&] { return released; }); |
| 206 | + |
| 207 | + // check that the first context is still active, this is because deleting a |
| 208 | + // context only cleans up the current thread |
| 209 | + cuCtxGetCurrent(¤t); |
| 210 | + ASSERT_EQ(current, context1->get()); |
| 211 | + |
| 212 | + // create a queue with the second context |
| 213 | + ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>( |
| 214 | + context2, device_, 0, &queue)), |
| 215 | + PI_SUCCESS); |
| 216 | + ASSERT_NE(queue, nullptr); |
| 217 | + |
| 218 | + // ensure the queue has the correct context |
| 219 | + ASSERT_EQ(context2, queue->get_context()); |
| 220 | + |
| 221 | + // check that the second context is now the active CUDA context |
| 222 | + cuCtxGetCurrent(¤t); |
| 223 | + ASSERT_EQ(current, context2->get()); |
| 224 | + |
| 225 | + plugin->call<detail::PiApiKind::piQueueRelease>(queue); |
| 226 | + }); |
| 227 | + |
| 228 | + // wait for the thread to be done with the first queue to release the first context |
| 229 | + std::unique_lock<std::mutex> lock(m); |
| 230 | + cv.wait(lock, [&] { return thread_done; }); |
| 231 | + plugin->call<detail::PiApiKind::piContextRelease>(context1); |
| 232 | + |
| 233 | + // notify the other thread that the context was released |
| 234 | + released = true; |
| 235 | + lock.unlock(); |
| 236 | + cv.notify_one(); |
| 237 | + |
| 238 | + // wait for the thread to finish |
| 239 | + test_thread.join(); |
| 240 | + |
| 241 | + plugin->call<detail::PiApiKind::piContextRelease>(context2); |
| 242 | + |
| 243 | + // check that there is no context set on the main thread |
| 244 | + CUcontext current = nullptr; |
| 245 | + cuCtxGetCurrent(¤t); |
| 246 | + ASSERT_EQ(current, nullptr); |
| 247 | +} |
0 commit comments