diff --git a/source/nvidia/cuda/nvidia-cuda-context.cpp b/source/nvidia/cuda/nvidia-cuda-context.cpp index 2b886b37..a4621d7f 100644 --- a/source/nvidia/cuda/nvidia-cuda-context.cpp +++ b/source/nvidia/cuda/nvidia-cuda-context.cpp @@ -70,6 +70,9 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1 if (cu_result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != cu_result::SUCCESS) { throw std::runtime_error("Failed to acquire primary device context."); } + + _cuda->cuDevicePrimaryCtxSetFlags(_device, cu_context_flags::SCHEDULER_YIELD); + _has_device = true; } #endif diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp index a94f5e97..ceea4c32 100644 --- a/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp +++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp @@ -28,8 +28,8 @@ nvidia::cuda::gstexture::gstexture(std::shared_ptr cuda, std if (!cuda) throw std::invalid_argument("cuda"); - auto gtc = gs::context{}; - int dev_type = gs_get_device_type(); + gs::context gctx; + int dev_type = gs_get_device_type(); if (dev_type == GS_DEVICE_OPENGL) { // ToDo diff --git a/source/nvidia/cuda/nvidia-cuda-stream.cpp b/source/nvidia/cuda/nvidia-cuda-stream.cpp index 8aac943a..377a1818 100644 --- a/source/nvidia/cuda/nvidia-cuda-stream.cpp +++ b/source/nvidia/cuda/nvidia-cuda-stream.cpp @@ -20,9 +20,16 @@ #include "nvidia-cuda-stream.hpp" #include -nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda) : _cuda(cuda) +nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::cu_stream_flags flags, + std::int32_t priority) + : _cuda(cuda) { - nvidia::cuda::cu_result res = _cuda->cuStreamCreate(&_stream, 0); + nvidia::cuda::cu_result res; + if (priority == 0) { + res = _cuda->cuStreamCreate(&_stream, flags); + } else { + res = _cuda->cuStreamCreateWithPriority(&_stream, flags, priority); + } switch (res) { case nvidia::cuda::cu_result::SUCCESS: break; diff --git a/source/nvidia/cuda/nvidia-cuda-stream.hpp b/source/nvidia/cuda/nvidia-cuda-stream.hpp index 5d5e2674..5dd97db9 100644 --- a/source/nvidia/cuda/nvidia-cuda-stream.hpp +++ b/source/nvidia/cuda/nvidia-cuda-stream.hpp @@ -27,7 +27,9 @@ namespace nvidia::cuda { ::nvidia::cuda::cu_stream_t _stream; public: - stream(std::shared_ptr<::nvidia::cuda::cuda> cuda); + stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, + ::nvidia::cuda::cu_stream_flags flags = ::nvidia::cuda::cu_stream_flags::DEFAULT, + std::int32_t priority = 0); ~stream(); ::nvidia::cuda::cu_stream_t get(); diff --git a/source/nvidia/cuda/nvidia-cuda.cpp b/source/nvidia/cuda/nvidia-cuda.cpp index 3187a819..1b3be614 100644 --- a/source/nvidia/cuda/nvidia-cuda.cpp +++ b/source/nvidia/cuda/nvidia-cuda.cpp @@ -61,10 +61,12 @@ nvidia::cuda::cuda::cuda() // Primary Context Management CUDA_LOAD_SYMBOL(cuDevicePrimaryCtxRetain); CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxRelease); + CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxSetFlags); // Context Management CUDA_LOAD_SYMBOL_V2(cuCtxDestroy); CUDA_LOAD_SYMBOL(cuCtxGetCurrent); + CUDA_LOAD_SYMBOL(cuCtxGetStreamPriorityRange); CUDA_LOAD_SYMBOL_V2(cuCtxPopCurrent); CUDA_LOAD_SYMBOL_V2(cuCtxPushCurrent); CUDA_LOAD_SYMBOL(cuCtxSetCurrent); @@ -93,6 +95,7 @@ nvidia::cuda::cuda::cuda() // Stream Managment CUDA_LOAD_SYMBOL(cuStreamCreate); + CUDA_LOAD_SYMBOL(cuStreamCreateWithPriority); CUDA_LOAD_SYMBOL_V2(cuStreamDestroy); CUDA_LOAD_SYMBOL(cuStreamSynchronize); diff --git a/source/nvidia/cuda/nvidia-cuda.hpp b/source/nvidia/cuda/nvidia-cuda.hpp index 1e3b2f6e..eb17dbc9 100644 --- a/source/nvidia/cuda/nvidia-cuda.hpp +++ b/source/nvidia/cuda/nvidia-cuda.hpp @@ -21,6 +21,7 @@ #include #include #include +#include "utility.hpp" #ifdef WIN32 #pragma warning(push) @@ -75,11 +76,26 @@ namespace nvidia::cuda { FLOAT = 0b00100000, }; + enum class cu_context_flags : std::uint32_t { + SCHEDULER_AUTO = 0x0, + SCHEDULER_SPIN = 0x1, + SCHEDULER_YIELD = 0x2, + SCHEDULER_BLOCKING_SYNC = 0x4, + MAP_HOST = 0x8, + LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10, + }; + + enum class cu_stream_flags : std::uint32_t { + DEFAULT = 0x0, + NON_BLOCKING = 0x1, + }; + typedef void* cu_array_t; typedef void* cu_context_t; typedef std::uint64_t cu_device_ptr_t; typedef void* cu_graphics_resource_t; typedef void* cu_stream_t; + typedef std::int32_t cu_device_t; struct cu_memcpy2d_t { std::size_t src_x_in_bytes; @@ -138,10 +154,10 @@ namespace nvidia::cuda { // Primary Context Management // cuDevicePrimaryCtxGetState - CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, std::int32_t device); + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, cu_device_t device); // cuDevicePrimaryCtxReset_v2 - CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, std::int32_t device); - // cuDevicePrimaryCtxSetFlags_v2 + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, cu_device_t device); + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, cu_device_t device, cu_context_flags flags); // Context Management // cuCtxCreate_v2 @@ -153,7 +169,7 @@ namespace nvidia::cuda { // cuCtxGetFlags // cuCtxGetLimit // cuCtxGetSharedMemConfig - // cuCtxGetStreamPriorityRange + CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, std::int32_t* lowestPriority, std::int32_t* highestPriority); CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, cu_context_t* ctx); CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, cu_context_t ctx); // cuCtxSetCacheConfig @@ -278,8 +294,9 @@ namespace nvidia::cuda { // cuStreamAddCallback // cuStreamAttachMemAsync // cuStreamBeginCapture_v2 - CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, std::uint32_t flags); - // cuStreamCreateWithPriority + CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, cu_stream_flags flags); + CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, cu_stream_t* stream, cu_stream_flags flags, + std::int32_t priority); CUDA_DEFINE_FUNCTION(cuStreamDestroy, cu_stream_t stream); // cuStreamEndCapture // cuStreamGetCaptureInfo @@ -385,3 +402,6 @@ namespace nvidia::cuda { #endif }; } // namespace nvidia::cuda + +P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_context_flags) +P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_stream_flags)