obs-StreamFX/source/nvidia/cuda/nvidia-cuda.hpp
Michael Fabian 'Xaymar' Dirks 5a3954ae0e project: Fix License, License headers and Copyright information
Fixes several files incorrectly stated a different license from the actual project, as well as the copyright headers included in all files. This change has no effect on the licensing terms, it should clear up a bit of confusion by contributors. Plus the files get a bit smaller, and we have less duplicated information across the entire project.

Overall the project is GPLv2 if not built with Qt, and GPLv3 if it is built with Qt. There are no parts licensed under a different license, all have been adapted from other compatible licenses into GPLv2 or GPLv3.
2023-04-05 18:59:08 +02:00

347 lines
11 KiB
C++

// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2020-2023 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#pragma once
#include "util/util-bitmask.hpp"
#include "util/util-library.hpp"
#include "warning-disable.hpp"
#include <cstddef>
#include <tuple>
#include "warning-enable.hpp"
#ifdef WIN32
#include "warning-disable.hpp"
#include <d3d11.h>
#include <dxgi.h>
#include "warning-enable.hpp"
#endif
#define P_CUDA_DEFINE_FUNCTION(name, ...) \
private: \
typedef ::streamfx::nvidia::cuda::result (*t##name)(__VA_ARGS__); \
\
public: \
t##name name = nullptr;
namespace streamfx::nvidia::cuda {
enum class result : std::size_t {
SUCCESS = 0,
INVALID_VALUE = 1,
OUT_OF_MEMORY = 2,
NOT_INITIALIZED = 3,
DEINITIALIZED = 4,
NO_DEVICE = 100,
INVALID_DEVICE = 101,
INVALID_CONTEXT = 201,
MAP_FAILED = 205,
UNMAP_FAILED = 206,
ARRAY_IS_MAPPED = 207,
ALREADY_MAPPED = 208,
NOT_MAPPED = 211,
INVALID_GRAPHICS_CONTEXT = 219,
// Still missing some.
};
enum class memory_type : uint32_t {
HOST = 1,
DEVICE = 2,
ARRAY = 3,
UNIFIED = 4,
};
enum class array_format : uint32_t {
UNSIGNED_INT8 = 0b00000001,
UNSIGNED_INT16 = 0b00000010,
UNSIGNED_INT32 = 0b00000011,
SIGNED_INT8 = 0b00001000,
SIGNED_INT16 = 0b00001001,
SIGNED_INT32 = 0b00001010,
HALF = 0b00010000,
FLOAT = 0b00100000,
};
enum class context_flags : uint32_t {
SCHEDULER_AUTO = 0x0,
SCHEDULER_SPIN = 0x1,
SCHEDULER_YIELD = 0x2,
SCHEDULER_BLOCKING_SYNC = 0x4,
MAP_HOST = 0x8,
LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10,
};
enum class external_memory_handle_type : uint32_t {
INVALID = 0,
FILE_DESCRIPTOR = 1,
WIN32_SHARED_HANDLE = 2,
WIN32_GLOBAL_SHARED_HANDLE = 3,
D3D12_HEAP = 4,
D3D12_RESOURCE = 5,
D3D11_SHARED_RESOURCE = 6,
D3D11_GLOBAL_SHARED_RESOURCE = 7,
NVSCIBUF = 8,
};
enum class stream_flags : uint32_t {
DEFAULT = 0x0,
NON_BLOCKING = 0x1,
};
typedef void* array_t;
typedef void* context_t;
typedef uint64_t device_ptr_t;
typedef void* external_memory_t;
typedef void* graphics_resource_t;
typedef void* stream_t;
typedef int32_t device_t;
struct memcpy2d_v2_t {
std::size_t src_x_in_bytes;
std::size_t src_y;
memory_type src_memory_type;
const void* src_host;
device_ptr_t src_device;
array_t src_array;
std::size_t src_pitch;
std::size_t dst_x_in_bytes;
std::size_t dst_y;
memory_type dst_memory_type;
const void* dst_host;
device_ptr_t dst_device;
array_t dst_array;
std::size_t dst_pitch;
std::size_t width_in_bytes;
std::size_t height;
};
struct array_descriptor_v2_t {
std::size_t width;
std::size_t height;
uint32_t num_channels;
array_format format;
};
struct external_memory_buffer_info_v1_t {
uint64_t offset;
uint64_t size;
uint32_t flags;
uint32_t reserved[16];
};
struct external_memory_handle_info_v1_t {
external_memory_handle_type type;
union {
int32_t file;
struct {
void* handle;
const void* name;
};
const void* nvscibuf;
};
uint64_t size;
uint32_t flags;
uint32_t reserved[16];
};
struct uuid_t {
union {
char bytes[16];
struct {
uint32_t a;
uint16_t b;
uint16_t c;
uint16_t d;
uint16_t e;
uint32_t f;
} uuid;
};
};
struct luid_t {
union {
char bytes[8];
struct {
uint32_t low;
int32_t high;
} parts;
uint64_t luid;
};
};
class cuda_error : public std::exception {
::streamfx::nvidia::cuda::result _code;
public:
~cuda_error(){};
cuda_error(::streamfx::nvidia::cuda::result code) : _code(code) {}
::streamfx::nvidia::cuda::result code()
{
return _code;
}
};
class cuda {
std::shared_ptr<streamfx::util::library> _library;
public:
~cuda();
cuda();
int32_t version();
public:
// Initialization
P_CUDA_DEFINE_FUNCTION(cuInit, int32_t flags);
// Version Management
P_CUDA_DEFINE_FUNCTION(cuDriverGetVersion, int32_t* driverVersion);
// Device Management
P_CUDA_DEFINE_FUNCTION(cuDeviceGetName, char* name, int32_t length, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDeviceGetLuid, luid_t* luid, uint32_t* device_node_mask, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDeviceGetUuid, uuid_t* uuid, device_t device);
// - Not yet needed.
// Primary Context Management
P_CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags);
// Context Management
P_CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device);
P_CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, int32_t* lowestPriority, int32_t* highestPriority);
P_CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxSynchronize);
// Module Management
// - Not yet needed.
// Memory Management
P_CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_v2_t* pArrayDescripter, array_t array);
P_CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes);
P_CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
std::size_t height, uint32_t element_size_bytes);
P_CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr);
P_CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, uint32_t flags);
P_CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes);
P_CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_v2_t* copy);
P_CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_v2_t* copy, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset,
std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset,
std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, array_t dst, std::size_t dstOffset, device_ptr_t src,
std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, device_ptr_t dst, array_t srcArray, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, array_t src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, array_t src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD8, device_ptr_t dst, uint8_t d, size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD8Async, device_ptr_t dst, uint8_t d, size_t byteCount, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuMemsetD16, device_ptr_t dst, uint16_t d, size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD16Async, device_ptr_t dst, uint16_t d, size_t byteCount, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuMemsetD32, device_ptr_t dst, uint32_t d, size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD32Async, device_ptr_t dst, uint32_t d, size_t byteCount, stream_t stream);
// Virtual Memory Management
// - Not yet needed.
// Stream Ordered Memory Allocator
// - Not yet needed.
// Unified Addressing
// - Not yet needed.
// Stream Managment
P_CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags);
P_CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, int32_t priority);
P_CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuStreamGetPriority, stream_t stream, int32_t* priority);
// Event Management
// - Not yet needed.
// External Resource Interoperability (CUDA 11.1+)
// - Not yet needed.
// Stream Memory Operations
// - Not yet needed.
// Execution Control
// - Not yet needed.
// Graph Management
// - Not yet needed.
// Occupancy
// - Not yet needed.
// Texture Object Management
// - Not yet needed.
// Surface Object Management
// - Not yet needed.
// Peer Context Memory Access
// - Not yet needed.
// Graphics Interoperability
P_CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, uint32_t count, graphics_resource_t* resources, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource,
uint32_t index, uint32_t level);
P_CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, uint32_t count, graphics_resource_t* resources,
stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource);
// Driver Entry Point Access
// - Not yet needed.
// Profiler Control
// - Not yet needed.
// OpenGL Interoperability
// - Not yet needed.
// VDPAU Interoperability
// - Not yet needed.
// EGL Interoperability
// - Not yet needed.
#ifdef WIN32
// Direct3D9 Interoperability
// - Not yet needed.
// Direct3D10 Interoperability
P_CUDA_DEFINE_FUNCTION(cuD3D10GetDevice, device_t* device, IDXGIAdapter* adapter);
P_CUDA_DEFINE_FUNCTION(cuGraphicsD3D10RegisterResource, graphics_resource_t* resource,
ID3D10Resource* d3dresource, uint32_t flags);
// Direct3D11 Interoperability
P_CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter);
P_CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource,
ID3D11Resource* d3dresource, uint32_t flags);
#endif
public:
static std::shared_ptr<::streamfx::nvidia::cuda::cuda> get();
};
} // namespace streamfx::nvidia::cuda
P_ENABLE_BITMASK_OPERATORS(::streamfx::nvidia::cuda::context_flags)
P_ENABLE_BITMASK_OPERATORS(::streamfx::nvidia::cuda::stream_flags)