/*
 * Modern effects for a modern Streamer
 * Copyright (C) 2020 Michael Fabian Dirks
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */

#include "nvidia-cuda.hpp"
#include <mutex>
#include "util/util-logging.hpp"

#ifdef _DEBUG
#define ST_PREFIX "<%s> "
#define D_LOG_ERROR(x, ...) P_LOG_ERROR(ST_PREFIX##x, __FUNCTION_SIG__, __VA_ARGS__)
#define D_LOG_WARNING(x, ...) P_LOG_WARN(ST_PREFIX##x, __FUNCTION_SIG__, __VA_ARGS__)
#define D_LOG_INFO(x, ...) P_LOG_INFO(ST_PREFIX##x, __FUNCTION_SIG__, __VA_ARGS__)
#define D_LOG_DEBUG(x, ...) P_LOG_DEBUG(ST_PREFIX##x, __FUNCTION_SIG__, __VA_ARGS__)
#else
#define ST_PREFIX "<nvidia::cuda::cuda> "
#define D_LOG_ERROR(...) P_LOG_ERROR(ST_PREFIX __VA_ARGS__)
#define D_LOG_WARNING(...) P_LOG_WARN(ST_PREFIX __VA_ARGS__)
#define D_LOG_INFO(...) P_LOG_INFO(ST_PREFIX __VA_ARGS__)
#define D_LOG_DEBUG(...) P_LOG_DEBUG(ST_PREFIX __VA_ARGS__)
#endif

#if defined(_WIN32) || defined(_WIN64)
#define ST_CUDA_NAME "nvcuda.dll"
#else
#define ST_CUDA_NAME "libcuda.so.1"
#endif

#define P_CUDA_LOAD_SYMBOL(NAME)                                                             \
	{                                                                                        \
		NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME));               \
		if (!NAME)                                                                           \
			throw std::runtime_error("Failed to load '" #NAME "' from '" ST_CUDA_NAME "'."); \
	}
#define P_CUDA_LOAD_SYMBOL_OPT(NAME)                                            \
	{                                                                           \
		NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME));  \
		if (!NAME)                                                              \
			D_LOG_WARNING("Loading of optional symbol '" #NAME "' failed.", 0); \
	}

#define P_CUDA_LOAD_SYMBOL_EX(NAME, OVERRIDE)                                                \
	{                                                                                        \
		NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#OVERRIDE));           \
		if (!NAME)                                                                           \
			throw std::runtime_error("Failed to load '" #NAME "' from '" ST_CUDA_NAME "'."); \
	}
#define P_CUDA_LOAD_SYMBOL_OPT_EX(NAME, OVERRIDE)                                  \
	{                                                                              \
		NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#OVERRIDE)); \
		if (!NAME)                                                                 \
			D_LOG_WARNING("Loading of optional symbol '" #NAME "' failed.", 0);    \
	}

#define P_CUDA_LOAD_SYMBOL_V2(NAME)                                                          \
	{                                                                                        \
		NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME "_v2"));         \
		if (!NAME)                                                                           \
			throw std::runtime_error("Failed to load '" #NAME "' from '" ST_CUDA_NAME "'."); \
	}
#define P_CUDA_LOAD_SYMBOL_OPT_V2(NAME)                                              \
	{                                                                                \
		NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME "_v2")); \
		if (!NAME)                                                                   \
			D_LOG_WARNING("Loading of optional symbol '" #NAME "' failed.", 0);      \
	}

nvidia::cuda::cuda::~cuda()
{
	D_LOG_DEBUG("Finalizing... (Addr: 0x%" PRIuPTR ")", this);
}

nvidia::cuda::cuda::cuda() : _library()
{
	int32_t cuda_version = 0;

	D_LOG_DEBUG("Initialization... (Addr: 0x%" PRIuPTR ")", this);

	_library = streamfx::util::library::load(std::string_view(ST_CUDA_NAME));

	{ // 1. Load critical initialization functions.
		// Initialization
		P_CUDA_LOAD_SYMBOL(cuInit);

		// Version Management
		P_CUDA_LOAD_SYMBOL(cuDriverGetVersion);
	}

	{ // 2. Get the CUDA Driver version and log it.
		if (cuDriverGetVersion(&cuda_version) == result::SUCCESS) {
			int32_t major = cuda_version / 1000;
			int32_t minor = (cuda_version % 1000) / 10;
			int32_t patch = (cuda_version % 10);
			D_LOG_INFO("Driver reported CUDA version: %" PRId32 ".%" PRId32 ".%" PRId32, major, minor, patch);
		} else {
			D_LOG_WARNING("Failed to query NVIDIA CUDA Driver for version.", 0);
		}
	}

	{ // 3. Load remaining functions.
		// Device Management
		// - Not yet needed.

		// Primary Context Management
		P_CUDA_LOAD_SYMBOL(cuDevicePrimaryCtxRetain);
		P_CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxRelease);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuDevicePrimaryCtxSetFlags);

		// Context Management
		P_CUDA_LOAD_SYMBOL_V2(cuCtxCreate);
		P_CUDA_LOAD_SYMBOL_V2(cuCtxDestroy);
		P_CUDA_LOAD_SYMBOL_V2(cuCtxPushCurrent);
		P_CUDA_LOAD_SYMBOL_V2(cuCtxPopCurrent);
		P_CUDA_LOAD_SYMBOL_OPT(cuCtxGetCurrent);
		P_CUDA_LOAD_SYMBOL_OPT(cuCtxSetCurrent);
		P_CUDA_LOAD_SYMBOL(cuCtxGetStreamPriorityRange);
		P_CUDA_LOAD_SYMBOL(cuCtxSynchronize);

		// Module Management
		// - Not yet needed.

		// Memory Management
		P_CUDA_LOAD_SYMBOL_V2(cuMemAlloc);
		P_CUDA_LOAD_SYMBOL_V2(cuMemAllocPitch);
		P_CUDA_LOAD_SYMBOL_V2(cuMemFree);
		P_CUDA_LOAD_SYMBOL(cuMemcpy);
		P_CUDA_LOAD_SYMBOL_V2(cuMemcpy2D);
		P_CUDA_LOAD_SYMBOL_V2(cuMemcpy2DAsync);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuArrayGetDescriptor);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoA);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoD);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoH);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoHAsync);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoA);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoD);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoH);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoHAsync);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoA);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoAAsync);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoD);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoDAsync);
		P_CUDA_LOAD_SYMBOL_OPT_V2(cuMemHostGetDevicePointer);

		// Virtual Memory Management
		// - Not yet needed.

		// Stream Ordered Memory Allocator
		// - Not yet needed.

		// Unified Addressing
		// - Not yet needed.

		// Stream Management
		P_CUDA_LOAD_SYMBOL(cuStreamCreate);
		P_CUDA_LOAD_SYMBOL_V2(cuStreamDestroy);
		P_CUDA_LOAD_SYMBOL(cuStreamSynchronize);
		P_CUDA_LOAD_SYMBOL_OPT(cuStreamCreateWithPriority);
		P_CUDA_LOAD_SYMBOL_OPT(cuStreamGetPriority);

		// Event Management
		// - Not yet needed.

		// External Resource Interoperability (CUDA 11.1+)
		// - Not yet needed.

		// Stream Memory Operations
		// - Not yet needed.

		// Execution Control
		// - Not yet needed.

		// Graph Management
		// - Not yet needed.

		// Occupancy
		// - Not yet needed.

		// Texture Object Management
		// - Not yet needed.

		// Surface Object Management
		// - Not yet needed.

		// Peer Context Memory Access
		// - Not yet needed.

		// Graphics Interoperability
		P_CUDA_LOAD_SYMBOL(cuGraphicsMapResources);
		P_CUDA_LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray);
		P_CUDA_LOAD_SYMBOL(cuGraphicsUnmapResources);
		P_CUDA_LOAD_SYMBOL(cuGraphicsUnregisterResource);

		// Driver Entry Point Access
		// - Not yet needed.

		// Profiler Control
		// - Not yet needed.

		// OpenGL Interoperability
		// - Not yet needed.

		// VDPAU Interoperability
		// - Not yet needed.

		// EGL Interoperability
		// - Not yet needed.

#ifdef WIN32
		// Direct3D9 Interoperability
		// - Not yet needed.

		// Direct3D10 Interoperability
		P_CUDA_LOAD_SYMBOL(cuD3D10GetDevice);
		P_CUDA_LOAD_SYMBOL_OPT(cuGraphicsD3D10RegisterResource);

		// Direct3D11 Interoperability
		P_CUDA_LOAD_SYMBOL(cuD3D11GetDevice);
		P_CUDA_LOAD_SYMBOL_OPT(cuGraphicsD3D11RegisterResource);
#endif
	}

	// Initialize CUDA
	cuInit(0);
}

int32_t nvidia::cuda::cuda::version()
{
	int32_t v = 0;
	cuDriverGetVersion(&v);
	return v;
}

std::shared_ptr<nvidia::cuda::cuda> nvidia::cuda::cuda::get()
{
	static std::weak_ptr<nvidia::cuda::cuda> instance;
	static std::mutex                        lock;

	std::unique_lock<std::mutex> ul(lock);
	if (instance.expired()) {
		auto hard_instance = std::make_shared<nvidia::cuda::cuda>();
		instance           = hard_instance;
		return hard_instance;
	}
	return instance.lock();
}