mirror of
https://github.com/Xaymar/obs-StreamFX
synced 2024-11-27 22:03:01 +00:00
nvidia/cuda: Remove 'cu_' prefix from types
This commit is contained in:
parent
6ae69a5c71
commit
9658c1ee0f
11 changed files with 112 additions and 117 deletions
|
@ -79,7 +79,7 @@ face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_
|
|||
_geometry = std::make_shared<gs::vertex_buffer>(4, 1);
|
||||
auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
|
||||
_cuda_stream =
|
||||
std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::cu_stream_flags::NON_BLOCKING, 0);
|
||||
std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::stream_flags::NON_BLOCKING, 0);
|
||||
}
|
||||
|
||||
{ // Asynchronously load Face Tracking.
|
||||
|
@ -311,26 +311,26 @@ void face_tracking_instance::async_track(std::shared_ptr<void> ptr)
|
|||
#ifdef ENABLE_PROFILING
|
||||
auto prof = _profile_ar_copy->track();
|
||||
#endif
|
||||
::nvidia::cuda::cu_memcpy2d_t mc;
|
||||
::nvidia::cuda::memcpy2d_t mc;
|
||||
mc.src_x_in_bytes = 0;
|
||||
mc.src_y = 0;
|
||||
mc.src_memory_type = ::nvidia::cuda::cu_memory_type::ARRAY;
|
||||
mc.src_memory_type = ::nvidia::cuda::memory_type::ARRAY;
|
||||
mc.src_host = nullptr;
|
||||
mc.src_device = 0;
|
||||
mc.src_array = _ar_texture_cuda->map(_cuda_stream);
|
||||
mc.src_pitch = static_cast<size_t>(_ar_image.pitch);
|
||||
mc.dst_x_in_bytes = 0;
|
||||
mc.dst_y = 0;
|
||||
mc.dst_memory_type = ::nvidia::cuda::cu_memory_type::DEVICE;
|
||||
mc.dst_memory_type = ::nvidia::cuda::memory_type::DEVICE;
|
||||
mc.dst_host = 0;
|
||||
mc.dst_device = reinterpret_cast<::nvidia::cuda::cu_device_ptr_t>(_ar_image.pixels);
|
||||
mc.dst_device = reinterpret_cast<::nvidia::cuda::device_ptr_t>(_ar_image.pixels);
|
||||
mc.dst_array = 0;
|
||||
mc.dst_pitch = static_cast<size_t>(_ar_image.pitch);
|
||||
mc.width_in_bytes = static_cast<size_t>(_ar_image.pitch);
|
||||
mc.height = _ar_image.height;
|
||||
|
||||
if (::nvidia::cuda::cu_result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get());
|
||||
res != ::nvidia::cuda::cu_result::SUCCESS) {
|
||||
if (::nvidia::cuda::result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get());
|
||||
res != ::nvidia::cuda::result::SUCCESS) {
|
||||
LOG_ERROR("<%s> Failed to prepare buffers for tracking.", obs_source_get_name(_self));
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ nvidia::cuda::context_stack::context_stack(std::shared_ptr<::nvidia::cuda::cuda>
|
|||
if (!context)
|
||||
throw std::invalid_argument("context");
|
||||
|
||||
if (cu_result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != cu_result::SUCCESS) {
|
||||
if (result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != result::SUCCESS) {
|
||||
throw std::runtime_error("Failed to push context.");
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ nvidia::cuda::context_stack::~context_stack()
|
|||
{
|
||||
using namespace ::nvidia::cuda;
|
||||
|
||||
cu_context_t ctx;
|
||||
context_t ctx;
|
||||
_cuda->cuCtxGetCurrent(&ctx);
|
||||
if (ctx == _ctx->get()) {
|
||||
_cuda->cuCtxPopCurrent(&ctx);
|
||||
|
|
|
@ -62,12 +62,12 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1
|
|||
dxgi_device->GetAdapter(&dxgi_adapter);
|
||||
|
||||
// Get Device Index
|
||||
if (cu_result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != cu_result::SUCCESS) {
|
||||
if (result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != result::SUCCESS) {
|
||||
throw std::runtime_error("Failed to get device index for device.");
|
||||
}
|
||||
|
||||
// Acquire Context
|
||||
if (cu_result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != cu_result::SUCCESS) {
|
||||
if (result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != result::SUCCESS) {
|
||||
throw std::runtime_error("Failed to acquire primary device context.");
|
||||
}
|
||||
|
||||
|
@ -75,7 +75,7 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1
|
|||
}
|
||||
#endif
|
||||
|
||||
::nvidia::cuda::cu_context_t nvidia::cuda::context::get()
|
||||
::nvidia::cuda::context_t nvidia::cuda::context::get()
|
||||
{
|
||||
return _ctx;
|
||||
}
|
||||
|
|
|
@ -24,11 +24,11 @@
|
|||
namespace nvidia::cuda {
|
||||
class context {
|
||||
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
|
||||
::nvidia::cuda::cu_context_t _ctx;
|
||||
::nvidia::cuda::context_t _ctx;
|
||||
|
||||
// Primary Device Context
|
||||
bool _has_device;
|
||||
::nvidia::cuda::cu_device_t _device;
|
||||
::nvidia::cuda::device_t _device;
|
||||
|
||||
private:
|
||||
context(std::shared_ptr<::nvidia::cuda::cuda> cuda);
|
||||
|
@ -40,6 +40,6 @@ namespace nvidia::cuda {
|
|||
context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device);
|
||||
#endif
|
||||
|
||||
::nvidia::cuda::cu_context_t get();
|
||||
::nvidia::cuda::context_t get();
|
||||
};
|
||||
} // namespace nvidia::cuda
|
||||
|
|
|
@ -54,7 +54,7 @@ nvidia::cuda::gstexture::gstexture(std::shared_ptr<nvidia::cuda::cuda> cuda, std
|
|||
}
|
||||
|
||||
switch (_cuda->cuGraphicsD3D11RegisterResource(&_resource, resource, 0)) {
|
||||
case nvidia::cuda::cu_result::SUCCESS:
|
||||
case nvidia::cuda::result::SUCCESS:
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("nvidia::cuda::gstexture: Failed to register resource.");
|
||||
|
@ -69,15 +69,15 @@ nvidia::cuda::gstexture::~gstexture()
|
|||
_cuda->cuGraphicsUnregisterResource(_resource);
|
||||
}
|
||||
|
||||
nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cuda::stream> stream)
|
||||
nvidia::cuda::array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cuda::stream> stream)
|
||||
{
|
||||
if (_is_mapped) {
|
||||
return _pointer;
|
||||
}
|
||||
|
||||
cu_graphics_resource_t resources[] = {_resource};
|
||||
graphics_resource_t resources[] = {_resource};
|
||||
switch (_cuda->cuGraphicsMapResources(1, resources, stream->get())) {
|
||||
case nvidia::cuda::cu_result::SUCCESS:
|
||||
case nvidia::cuda::result::SUCCESS:
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("nvidia::cuda::gstexture: Mapping failed.");
|
||||
|
@ -87,7 +87,7 @@ nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cu
|
|||
_is_mapped = true;
|
||||
|
||||
switch (_cuda->cuGraphicsSubResourceGetMappedArray(&_pointer, _resource, 0, 0)) {
|
||||
case nvidia::cuda::cu_result::SUCCESS:
|
||||
case nvidia::cuda::result::SUCCESS:
|
||||
break;
|
||||
default:
|
||||
unmap();
|
||||
|
@ -102,9 +102,9 @@ void nvidia::cuda::gstexture::unmap()
|
|||
if (!_is_mapped)
|
||||
return;
|
||||
|
||||
cu_graphics_resource_t resources[] = {_resource};
|
||||
graphics_resource_t resources[] = {_resource};
|
||||
switch (_cuda->cuGraphicsUnmapResources(1, resources, _stream->get())) {
|
||||
case nvidia::cuda::cu_result::SUCCESS:
|
||||
case nvidia::cuda::result::SUCCESS:
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("nvidia::cuda::gstexture: Unmapping failed.");
|
||||
|
|
|
@ -28,17 +28,17 @@ namespace nvidia::cuda {
|
|||
class gstexture {
|
||||
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
|
||||
std::shared_ptr<gs::texture> _texture;
|
||||
cu_graphics_resource_t _resource;
|
||||
graphics_resource_t _resource;
|
||||
|
||||
bool _is_mapped;
|
||||
cu_array_t _pointer;
|
||||
array_t _pointer;
|
||||
std::shared_ptr<nvidia::cuda::stream> _stream;
|
||||
|
||||
public:
|
||||
gstexture(std::shared_ptr<nvidia::cuda::cuda> cuda, std::shared_ptr<gs::texture> texture);
|
||||
~gstexture();
|
||||
|
||||
cu_array_t map(std::shared_ptr<nvidia::cuda::stream> stream);
|
||||
array_t map(std::shared_ptr<nvidia::cuda::stream> stream);
|
||||
void unmap();
|
||||
};
|
||||
} // namespace nvidia::cuda
|
||||
|
|
|
@ -23,9 +23,9 @@
|
|||
nvidia::cuda::memory::memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size)
|
||||
: _cuda(cuda), _pointer(), _size(size)
|
||||
{
|
||||
::nvidia::cuda::cu_result res = _cuda->cuMemAlloc(&_pointer, size);
|
||||
::nvidia::cuda::result res = _cuda->cuMemAlloc(&_pointer, size);
|
||||
switch (res) {
|
||||
case ::nvidia::cuda::cu_result::SUCCESS:
|
||||
case ::nvidia::cuda::result::SUCCESS:
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("nvidia::cuda::memory: cuMemAlloc failed.");
|
||||
|
@ -37,7 +37,7 @@ nvidia::cuda::memory::~memory()
|
|||
_cuda->cuMemFree(_pointer);
|
||||
}
|
||||
|
||||
nvidia::cuda::cu_device_ptr_t nvidia::cuda::memory::get()
|
||||
nvidia::cuda::device_ptr_t nvidia::cuda::memory::get()
|
||||
{
|
||||
return _pointer;
|
||||
}
|
||||
|
|
|
@ -25,14 +25,14 @@
|
|||
namespace nvidia::cuda {
|
||||
class memory {
|
||||
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
|
||||
cu_device_ptr_t _pointer;
|
||||
device_ptr_t _pointer;
|
||||
std::size_t _size;
|
||||
|
||||
public:
|
||||
memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size);
|
||||
~memory();
|
||||
|
||||
cu_device_ptr_t get();
|
||||
device_ptr_t get();
|
||||
|
||||
std::size_t size();
|
||||
};
|
||||
|
|
|
@ -20,18 +20,18 @@
|
|||
#include "nvidia-cuda-stream.hpp"
|
||||
#include <stdexcept>
|
||||
|
||||
nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::cu_stream_flags flags,
|
||||
nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::stream_flags flags,
|
||||
std::int32_t priority)
|
||||
: _cuda(cuda)
|
||||
{
|
||||
nvidia::cuda::cu_result res;
|
||||
nvidia::cuda::result res;
|
||||
if (priority == 0) {
|
||||
res = _cuda->cuStreamCreate(&_stream, flags);
|
||||
} else {
|
||||
res = _cuda->cuStreamCreateWithPriority(&_stream, flags, priority);
|
||||
}
|
||||
switch (res) {
|
||||
case nvidia::cuda::cu_result::SUCCESS:
|
||||
case nvidia::cuda::result::SUCCESS:
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("Failed to create CUstream object.");
|
||||
|
@ -43,7 +43,7 @@ nvidia::cuda::stream::~stream()
|
|||
_cuda->cuStreamDestroy(_stream);
|
||||
}
|
||||
|
||||
::nvidia::cuda::cu_stream_t nvidia::cuda::stream::get()
|
||||
::nvidia::cuda::stream_t nvidia::cuda::stream::get()
|
||||
{
|
||||
return _stream;
|
||||
}
|
||||
|
|
|
@ -24,14 +24,14 @@
|
|||
namespace nvidia::cuda {
|
||||
class stream {
|
||||
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
|
||||
::nvidia::cuda::cu_stream_t _stream;
|
||||
::nvidia::cuda::stream_t _stream;
|
||||
|
||||
public:
|
||||
stream(std::shared_ptr<::nvidia::cuda::cuda> cuda,
|
||||
::nvidia::cuda::cu_stream_flags flags = ::nvidia::cuda::cu_stream_flags::DEFAULT,
|
||||
::nvidia::cuda::stream_flags flags = ::nvidia::cuda::stream_flags::DEFAULT,
|
||||
std::int32_t priority = 0);
|
||||
~stream();
|
||||
|
||||
::nvidia::cuda::cu_stream_t get();
|
||||
::nvidia::cuda::stream_t get();
|
||||
};
|
||||
} // namespace nvidia::cuda
|
||||
|
|
|
@ -32,15 +32,15 @@
|
|||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#define CUDA_DEFINE_FUNCTION(name, ...) \
|
||||
private: \
|
||||
typedef ::nvidia::cuda::cu_result (*t##name)(__VA_ARGS__); \
|
||||
\
|
||||
public: \
|
||||
#define CUDA_DEFINE_FUNCTION(name, ...) \
|
||||
private: \
|
||||
typedef ::nvidia::cuda::result (*t##name)(__VA_ARGS__); \
|
||||
\
|
||||
public: \
|
||||
t##name name;
|
||||
|
||||
namespace nvidia::cuda {
|
||||
enum class cu_result : std::size_t {
|
||||
enum class result : std::size_t {
|
||||
SUCCESS = 0,
|
||||
INVALID_VALUE = 1,
|
||||
OUT_OF_MEMORY = 2,
|
||||
|
@ -58,14 +58,14 @@ namespace nvidia::cuda {
|
|||
// Still missing some.
|
||||
};
|
||||
|
||||
enum class cu_memory_type : std::uint32_t {
|
||||
enum class memory_type : std::uint32_t {
|
||||
HOST = 1,
|
||||
DEVICE = 2,
|
||||
ARRAY = 3,
|
||||
UNIFIED = 4,
|
||||
};
|
||||
|
||||
enum class cu_array_format : std::uint32_t {
|
||||
enum class array_format : std::uint32_t {
|
||||
UNSIGNED_INT8 = 0b00000001,
|
||||
UNSIGNED_INT16 = 0b00000010,
|
||||
UNSIGNED_INT32 = 0b00000011,
|
||||
|
@ -76,7 +76,7 @@ namespace nvidia::cuda {
|
|||
FLOAT = 0b00100000,
|
||||
};
|
||||
|
||||
enum class cu_context_flags : std::uint32_t {
|
||||
enum class context_flags : std::uint32_t {
|
||||
SCHEDULER_AUTO = 0x0,
|
||||
SCHEDULER_SPIN = 0x1,
|
||||
SCHEDULER_YIELD = 0x2,
|
||||
|
@ -85,46 +85,46 @@ namespace nvidia::cuda {
|
|||
LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10,
|
||||
};
|
||||
|
||||
enum class cu_stream_flags : std::uint32_t {
|
||||
enum class stream_flags : std::uint32_t {
|
||||
DEFAULT = 0x0,
|
||||
NON_BLOCKING = 0x1,
|
||||
};
|
||||
|
||||
typedef void* cu_array_t;
|
||||
typedef void* cu_context_t;
|
||||
typedef std::uint64_t cu_device_ptr_t;
|
||||
typedef void* cu_graphics_resource_t;
|
||||
typedef void* cu_stream_t;
|
||||
typedef std::int32_t cu_device_t;
|
||||
typedef void* array_t;
|
||||
typedef void* context_t;
|
||||
typedef std::uint64_t device_ptr_t;
|
||||
typedef void* graphics_resource_t;
|
||||
typedef void* stream_t;
|
||||
typedef std::int32_t device_t;
|
||||
|
||||
struct cu_memcpy2d_t {
|
||||
struct memcpy2d_t {
|
||||
std::size_t src_x_in_bytes;
|
||||
std::size_t src_y;
|
||||
|
||||
cu_memory_type src_memory_type;
|
||||
const void* src_host;
|
||||
cu_device_ptr_t src_device;
|
||||
cu_array_t src_array;
|
||||
std::size_t src_pitch;
|
||||
memory_type src_memory_type;
|
||||
const void* src_host;
|
||||
device_ptr_t src_device;
|
||||
array_t src_array;
|
||||
std::size_t src_pitch;
|
||||
|
||||
std::size_t dst_x_in_bytes;
|
||||
std::size_t dst_y;
|
||||
|
||||
cu_memory_type dst_memory_type;
|
||||
const void* dst_host;
|
||||
cu_device_ptr_t dst_device;
|
||||
cu_array_t dst_array;
|
||||
std::size_t dst_pitch;
|
||||
memory_type dst_memory_type;
|
||||
const void* dst_host;
|
||||
device_ptr_t dst_device;
|
||||
array_t dst_array;
|
||||
std::size_t dst_pitch;
|
||||
|
||||
std::size_t width_in_bytes;
|
||||
std::size_t height;
|
||||
};
|
||||
|
||||
struct cu_array_descriptor_t {
|
||||
std::size_t width;
|
||||
std::size_t height;
|
||||
std::uint32_t num_channels;
|
||||
cu_array_format format;
|
||||
struct array_descriptor_t {
|
||||
std::size_t width;
|
||||
std::size_t height;
|
||||
std::uint32_t num_channels;
|
||||
array_format format;
|
||||
};
|
||||
|
||||
class cuda {
|
||||
|
@ -154,26 +154,26 @@ namespace nvidia::cuda {
|
|||
|
||||
// Primary Context Management
|
||||
// cuDevicePrimaryCtxGetState
|
||||
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, cu_device_t device);
|
||||
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device);
|
||||
// cuDevicePrimaryCtxReset_v2
|
||||
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, cu_device_t device);
|
||||
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, cu_device_t device, cu_context_flags flags);
|
||||
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device);
|
||||
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags);
|
||||
|
||||
// Context Management
|
||||
CUDA_DEFINE_FUNCTION(cuCtxCreate, cu_context_t* ctx, cu_context_flags flags, cu_device_t device);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxDestroy, cu_context_t ctx);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx);
|
||||
// cuCtxGetApiVersion
|
||||
// cuCtxGetCacheConfig
|
||||
CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, cu_context_t* ctx);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx);
|
||||
// cuCtxGetDevice
|
||||
// cuCtxGetFlags
|
||||
// cuCtxGetLimit
|
||||
// cuCtxGetSharedMemConfig
|
||||
CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, std::int32_t* lowestPriority, std::int32_t* highestPriority);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, cu_context_t* ctx);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, cu_context_t ctx);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx);
|
||||
// cuCtxSetCacheConfig
|
||||
CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, cu_context_t ctx);
|
||||
CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx);
|
||||
// cuCtxSetLimit
|
||||
// cuCtxSetSharedMemConfig
|
||||
// cuCtxSynchronize
|
||||
|
@ -201,7 +201,7 @@ namespace nvidia::cuda {
|
|||
// cuArray3DGetDescripter_v2
|
||||
// cuArrayCreate_v2
|
||||
// cuArrayDestroy
|
||||
CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, cu_array_descriptor_t* pArrayDescripter, cu_array_t array);
|
||||
CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_t* pArrayDescripter, array_t array);
|
||||
// cuArrayGetDescriptor_v2
|
||||
// cuDeviceGetByPCIBusId
|
||||
// cuDeviceGetPCIBusId
|
||||
|
@ -210,46 +210,42 @@ namespace nvidia::cuda {
|
|||
// cuIpcGetMemHandle
|
||||
// cuIpcOpenEventHandle
|
||||
// cuIpcOpenMemHandle
|
||||
CUDA_DEFINE_FUNCTION(cuMemAlloc, cu_device_ptr_t* ptr, std::size_t bytes);
|
||||
CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes);
|
||||
// cuMemAllocHost_v2
|
||||
// cuMemAllocManaged
|
||||
CUDA_DEFINE_FUNCTION(cuMemAllocPitch, cu_device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
|
||||
CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
|
||||
std::size_t height, std::uint32_t element_size_bytes);
|
||||
CUDA_DEFINE_FUNCTION(cuMemFree, cu_device_ptr_t ptr);
|
||||
CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr);
|
||||
// cuMemFreeHost
|
||||
// cuMemGetAddressRange_v2
|
||||
// cuMemGetInfo_v2
|
||||
// cuMemHostAlloc
|
||||
CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, cu_device_ptr_t* devptr, void* ptr, std::uint32_t flags);
|
||||
CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, std::uint32_t flags);
|
||||
// cuMemHostGetFlags
|
||||
// cuMemHostRegister_v2
|
||||
// cuMemHostUnregister
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpy, cu_device_ptr_t dst, cu_device_ptr_t src, std::size_t bytes);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpy2D, const cu_memcpy2d_t* copy);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const cu_memcpy2d_t* copy, cu_stream_t stream);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_t* copy);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_t* copy, stream_t stream);
|
||||
// cuMemcpy2DUnaligned_v2 / _v2_ptds
|
||||
// cuMemcpy3D_v2 / _v2_ptds
|
||||
// cuMemcpy3DAsync_v2 / _v2_ptsz
|
||||
// cuMemcpy3DPeer / _ptds
|
||||
// cuMemcpy3DPeerAsync_v2 / _v2_ptsz
|
||||
// cuMemcpyAsync / _ptsz
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, cu_array_t dst, std::size_t dstOffset, cu_array_t src, std::size_t srcOffset,
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset,
|
||||
std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, cu_device_ptr_t dst, cu_array_t src, std::size_t srcOffset,
|
||||
std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, cu_array_t src, std::size_t srcOffset, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, cu_array_t src, std::size_t srcOffset,
|
||||
std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, cu_array_t dst, std::size_t dstOffset, cu_device_ptr_t src,
|
||||
std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, cu_device_ptr_t dst, cu_array_t srcArray, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, cu_array_t src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, cu_array_t src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, cu_array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, cu_array_t dst, std::size_t dstOffset, void* src,
|
||||
std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, cu_device_ptr_t dst, void* src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, cu_device_ptr_t dst, void* src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, array_t dst, std::size_t dstOffset, device_ptr_t src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, device_ptr_t dst, array_t srcArray, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, array_t src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, array_t src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount);
|
||||
CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount);
|
||||
// cuMemcpyPeer / _ptds
|
||||
// cuMemcpyPeerAsync / _ptsz
|
||||
// cuMemsetD16
|
||||
|
@ -295,10 +291,9 @@ namespace nvidia::cuda {
|
|||
// cuStreamAddCallback
|
||||
// cuStreamAttachMemAsync
|
||||
// cuStreamBeginCapture_v2
|
||||
CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, cu_stream_flags flags);
|
||||
CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, cu_stream_t* stream, cu_stream_flags flags,
|
||||
std::int32_t priority);
|
||||
CUDA_DEFINE_FUNCTION(cuStreamDestroy, cu_stream_t stream);
|
||||
CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags);
|
||||
CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, std::int32_t priority);
|
||||
CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream);
|
||||
// cuStreamEndCapture
|
||||
// cuStreamGetCaptureInfo
|
||||
// cuStreamGetCtx
|
||||
|
@ -306,7 +301,7 @@ namespace nvidia::cuda {
|
|||
// cuStreamGetPriority
|
||||
// cuStreamIsCapturing
|
||||
// cuStreamQuery
|
||||
CUDA_DEFINE_FUNCTION(cuStreamSynchronize, cu_stream_t stream);
|
||||
CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream);
|
||||
// cuStreamWaitEvent
|
||||
// cuThreadExchangeStreamCaptureMode
|
||||
|
||||
|
@ -361,16 +356,16 @@ namespace nvidia::cuda {
|
|||
// Todo
|
||||
|
||||
// Graphics Interoperability
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, cu_graphics_resource_t* resources,
|
||||
cu_stream_t stream);
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, graphics_resource_t* resources,
|
||||
stream_t stream);
|
||||
// cuGraphicsResourcesGetMappedMipmappedArray
|
||||
// cuGraphicsResourcesGetMappedPointer_v2
|
||||
// cuGraphicsResourcesSetMapFlags_v2
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, cu_array_t* array, cu_graphics_resource_t resource,
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource,
|
||||
std::uint32_t index, std::uint32_t level);
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, cu_graphics_resource_t* resources,
|
||||
cu_stream_t stream);
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, cu_graphics_resource_t resource);
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, graphics_resource_t* resources,
|
||||
stream_t stream);
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource);
|
||||
|
||||
// Profile Control
|
||||
// Todo
|
||||
|
@ -396,13 +391,13 @@ namespace nvidia::cuda {
|
|||
// cuGraphicsD3D10RegisterResource
|
||||
|
||||
// Direct3D11 Interopability
|
||||
CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, cu_device_t* device, IDXGIAdapter* adapter);
|
||||
CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter);
|
||||
// cuD3D11GetDevices
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, cu_graphics_resource_t* resource,
|
||||
CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource,
|
||||
ID3D11Resource* d3dresource, std::uint32_t flags);
|
||||
#endif
|
||||
};
|
||||
} // namespace nvidia::cuda
|
||||
|
||||
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_context_flags)
|
||||
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_stream_flags)
|
||||
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::context_flags)
|
||||
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::stream_flags)
|
||||
|
|
Loading…
Reference in a new issue