gs-vertexbuffer: Avoid relying on std for aligned types

Changes the GS::VertexBuffer storage to be one continuous buffer that is properly aligned and is also now used for GS::Vertex. This halves the necessary memory, removes reallocation cost and removes the copy necessary to get things onto the GPU.

Related: #9
This commit is contained in:
Michael Fabian 'Xaymar' Dirks 2018-01-18 05:01:54 +01:00
parent 4983e0ca06
commit 1ebb0beac4
6 changed files with 217 additions and 238 deletions

View file

@ -273,12 +273,12 @@ void Filter::Shape::Instance::update(obs_data_t *data) {
uint32_t points = (uint32_t)obs_data_get_int(data, P_SHAPE_POINTS);
m_vertexHelper->resize(points);
for (uint32_t point = 0; point < points; point++) {
GS::Vertex& v = m_vertexHelper->at(point);
GS::Vertex v = m_vertexHelper->at(point);
{
auto strings = cache.find(std::make_pair(point,
P_SHAPE_POINT_X));
if (strings != cache.end()) {
v.position.x = (float)(obs_data_get_double(data,
v.position->x = (float)(obs_data_get_double(data,
strings->second.first.c_str()) / 100.0);
}
}
@ -286,7 +286,7 @@ void Filter::Shape::Instance::update(obs_data_t *data) {
auto strings = cache.find(std::make_pair(point,
P_SHAPE_POINT_Y));
if (strings != cache.end()) {
v.position.y = (float)(obs_data_get_double(data,
v.position->y = (float)(obs_data_get_double(data,
strings->second.first.c_str()) / 100.0);
}
}
@ -294,7 +294,7 @@ void Filter::Shape::Instance::update(obs_data_t *data) {
auto strings = cache.find(std::make_pair(point,
P_SHAPE_POINT_U));
if (strings != cache.end()) {
v.uv[0].x = (float)(obs_data_get_double(data,
v.uv[0]->x = (float)(obs_data_get_double(data,
strings->second.first.c_str()) / 100.0);
}
}
@ -302,12 +302,12 @@ void Filter::Shape::Instance::update(obs_data_t *data) {
auto strings = cache.find(std::make_pair(point,
P_SHAPE_POINT_V));
if (strings != cache.end()) {
v.uv[0].y = (float)(obs_data_get_double(data,
v.uv[0]->y = (float)(obs_data_get_double(data,
strings->second.first.c_str()) / 100.0);
}
}
v.color = 0xFFFFFFFF;
v.position.z = 0.0f;
*v.color = 0xFFFFFFFF;
v.position->z = 0.0f;
}
drawmode = (gs_draw_mode)obs_data_get_int(data, P_SHAPE_MODE);
obs_enter_graphics();

View file

@ -465,41 +465,42 @@ void Filter::Transform::Instance::video_render(gs_effect_t *paramEffect) {
/// Generate mesh
{
GS::Vertex& v = m_vertexHelper->at(0);
v.uv[0].x = 0; v.uv[0].y = 0;
v.color = 0xFFFFFFFF;
v.position.x = -p_x + m_shear->x;
v.position.y = -p_y - m_shear->y;
v.position.z = 0.0f;
vec3_transform(&v.position, &v.position, &ident);
GS::Vertex vtx = m_vertexHelper->at(0);
*vtx.color = 0xFFFFFFFF;
vec4_set(vtx.uv[0], 0, 0, 0, 0);
vec3_set(vtx.position,
-p_x + m_shear->x,
-p_y - m_shear->y, 0);
vec3_transform(vtx.position, vtx.position, &ident);
}
{
GS::Vertex& v = m_vertexHelper->at(1);
v.uv[0].x = 1; v.uv[0].y = 0;
v.color = 0xFFFFFFFF;
v.position.x = p_x + m_shear->x;
v.position.y = -p_y + m_shear->y;
v.position.z = 0.0f;
vec3_transform(&v.position, &v.position, &ident);
GS::Vertex vtx = m_vertexHelper->at(1);
*vtx.color = 0xFFFFFFFF;
vec4_set(vtx.uv[0], 1, 0, 0, 0);
vec3_set(vtx.position,
p_x + m_shear->x,
-p_y + m_shear->y, 0);
vec3_transform(vtx.position, vtx.position, &ident);
}
{
GS::Vertex& v = m_vertexHelper->at(2);
v.uv[0].x = 0; v.uv[0].y = 1;
v.color = 0xFFFFFFFF;
v.position.x = -p_x - m_shear->x;
v.position.y = p_y - m_shear->y;
v.position.z = 0.0f;
vec3_transform(&v.position, &v.position, &ident);
GS::Vertex vtx = m_vertexHelper->at(2);
*vtx.color = 0xFFFFFFFF;
vec4_set(vtx.uv[0], 0, 1, 0, 0);
vec3_set(vtx.position,
-p_x - m_shear->x,
p_y - m_shear->y, 0);
vec3_transform(vtx.position, vtx.position, &ident);
}
{
GS::Vertex& v = m_vertexHelper->at(3);
v.uv[0].x = 1; v.uv[0].y = 1;
v.color = 0xFFFFFFFF;
v.position.x = p_x - m_shear->x;
v.position.y = p_y + m_shear->y;
v.position.z = 0.0f;
vec3_transform(&v.position, &v.position, &ident);
GS::Vertex vtx = m_vertexHelper->at(3);
*vtx.color = 0xFFFFFFFF;
vec4_set(vtx.uv[0], 1, 1, 0, 0);
vec3_set(vtx.position,
p_x - m_shear->x,
p_y + m_shear->y, 0);
vec3_transform(vtx.position, vtx.position, &ident);
}
m_vertexBuffer = m_vertexHelper->get();
if (!m_vertexBuffer) {
obs_source_skip_video_filter(m_sourceContext);

View file

@ -17,50 +17,4 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "gs-vertexbuffer.h"
#include "util-memory.h"
#include <malloc.h>
GS::Vertex& GS::Vertex::operator=(const Vertex& r) {
vec3_copy(&this->position, &r.position);
vec3_copy(&this->normal, &r.normal);
vec3_copy(&this->tangent, &r.tangent);
for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) {
vec4_copy(&this->uv[n], &r.uv[n]);
}
return *this;
}
GS::Vertex* GS::Vertex::operator=(const Vertex* r) {
vec3_copy(&this->position, &r->position);
vec3_copy(&this->normal, &r->normal);
vec3_copy(&this->tangent, &r->tangent);
for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) {
vec4_copy(&this->uv[n], &r->uv[n]);
}
return this;
}
void* GS::Vertex::operator new(size_t count) {
return _aligned_malloc(count, 16);
}
void* GS::Vertex::operator new(size_t count, void* d){
return d;
}
void* GS::Vertex::operator new[](size_t count) {
return _aligned_malloc(count, 16);
}
void* GS::Vertex::operator new[](size_t count, void* d) {
return d;
}
void GS::Vertex::operator delete(void* p) {
return _aligned_free(p);
}
void GS::Vertex::operator delete[](void* p) {
return _aligned_free(p);
}
#include "gs-vertex.h"

View file

@ -18,6 +18,7 @@
*/
#pragma once
#include "gs-limits.h"
#include <inttypes.h>
#include <xmmintrin.h>
extern "C" {
@ -28,41 +29,11 @@ extern "C" {
}
namespace GS {
const uint32_t MAXIMUM_UVW_LAYERS = 8u;
// ToDo: Optimize for use with GS::VertexBuffer so that it doesn't require in-memory copy.
__declspec(align(16)) struct Vertex {
union {
__m128 _positionM;
vec3 position;
};
union {
__m128 _normalM;
vec3 normal;
};
union {
__m128 _tangentM;
vec3 tangent;
};
union {
__m128 _uvM[MAXIMUM_UVW_LAYERS];
vec4 uv[MAXIMUM_UVW_LAYERS];
};
uint32_t color;
// Operators
static void* Vertex::operator new(size_t count);
static void* Vertex::operator new[](size_t count);
static void* Vertex::operator new(size_t count, void* d);
static void* Vertex::operator new[](size_t count, void* d);
static void Vertex::operator delete(void* p);
static void Vertex::operator delete[](void* p);
//Vertex& Vertex::operator =(Vertex r);
Vertex& Vertex::operator =(const Vertex& r);
Vertex* Vertex::operator =(const Vertex* r);
private:
uint32_t padding[3];
struct Vertex {
vec3* position;
vec3* normal;
vec3* tangent;
uint32_t* color;
vec4* uv[MAXIMUM_UVW_LAYERS];
};
}

View file

@ -18,6 +18,7 @@
*/
#include "gs-vertexbuffer.h"
#include "util-memory.h"
#include <stdexcept>
extern "C" {
#pragma warning( push )
@ -26,133 +27,171 @@ extern "C" {
#pragma warning( pop )
}
const uint32_t defaultMaximumVertices = 65535u;
#pragma region Constructor & Destructor
GS::VertexBuffer::VertexBuffer(uint32_t maximumVertices) {
m_maximumVertices = maximumVertices;
m_uvwLayers = MAXIMUM_UVW_LAYERS;
// Reserve Space
m_vertexbufferdata = gs_vbdata_create();
m_vertexbufferdata->num = m_maximumVertices;
m_data.positions.resize(m_maximumVertices);
m_vertexbufferdata->points = m_data.positions.data();
m_data.normals.resize(m_maximumVertices);
m_vertexbufferdata->normals = m_data.normals.data();
m_data.tangents.resize(m_maximumVertices);
m_vertexbufferdata->tangents = m_data.tangents.data();
m_data.colors.resize(m_maximumVertices);
m_vertexbufferdata->colors = m_data.colors.data();
m_vertexbufferdata->num_tex = m_uvwLayers;
m_data.uvws.resize(m_uvwLayers);
m_data.uvwdata.resize(m_uvwLayers);
for (uint32_t n = 0; n < m_uvwLayers; n++) {
m_data.uvws[n].resize(m_maximumVertices);
m_data.uvwdata[n].width = 4;
m_data.uvwdata[n].array = m_data.uvws[n].data();
if (maximumVertices > MAXIMUM_VERTICES) {
throw std::out_of_range("maximumVertices out of range");
}
// Assign limits.
m_capacity = maximumVertices;
m_layers = MAXIMUM_UVW_LAYERS;
// Allocate memory for data.
m_vertexbufferdata = gs_vbdata_create();
m_vertexbufferdata->num = m_capacity;
m_vertexbufferdata->points = m_positions = (vec3*)util::malloc_aligned(16, sizeof(vec3) * m_capacity);
m_vertexbufferdata->normals = m_normals = (vec3*)util::malloc_aligned(16, sizeof(vec3) * m_capacity);
m_vertexbufferdata->tangents = m_tangents = (vec3*)util::malloc_aligned(16, sizeof(vec3) * m_capacity);
m_vertexbufferdata->colors = m_colors = (uint32_t*)util::malloc_aligned(16, sizeof(uint32_t) * m_capacity);
m_vertexbufferdata->num_tex = m_layers;
m_vertexbufferdata->tvarray = m_layerdata = (gs_tvertarray*)util::malloc_aligned(16, sizeof(gs_tvertarray)* m_layers);
for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) {
m_layerdata[n].array = m_uvs[n] = (vec4*)util::malloc_aligned(16, sizeof(vec4) * m_capacity);
m_layerdata[n].width = 4;
}
m_vertexbufferdata->tvarray = m_data.uvwdata.data();
// Allocate GPU
obs_enter_graphics();
m_vertexbuffer = gs_vertexbuffer_create(m_vertexbufferdata, GS_DYNAMIC);
std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data));
m_vertexbufferdata->num = m_capacity;
m_vertexbufferdata->num_tex = m_layers;
obs_leave_graphics();
if (!m_vertexbuffer) {
throw std::runtime_error("Failed to create vertex buffer.");
}
}
GS::VertexBuffer::VertexBuffer(gs_vertbuffer_t* vb) {
m_vertexbuffer = vb;
}
GS::VertexBuffer::VertexBuffer() : VertexBuffer(defaultMaximumVertices) {}
GS::VertexBuffer::VertexBuffer(std::vector<Vertex*>& other) : VertexBuffer((uint32_t)other.capacity()) {
std::copy(other.begin(), other.end(), this->end());
}
GS::VertexBuffer::VertexBuffer(VertexBuffer& other) : VertexBuffer(other.m_maximumVertices) {
std::copy(other.begin(), other.end(), this->end());
}
GS::VertexBuffer::VertexBuffer() : VertexBuffer(MAXIMUM_VERTICES) {}
GS::VertexBuffer::~VertexBuffer() {
if (m_vertexbuffer) {
if (m_positions) {
util::free_aligned(m_positions);
m_positions = nullptr;
}
if (m_normals) {
util::free_aligned(m_normals);
m_normals = nullptr;
}
if (m_tangents) {
util::free_aligned(m_tangents);
m_tangents = nullptr;
}
if (m_colors) {
util::free_aligned(m_colors);
m_colors = nullptr;
}
for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) {
if (m_uvs[n]) {
util::free_aligned(m_uvs[n]);
m_uvs[n] = nullptr;
}
}
if (m_vertexbufferdata) {
std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data));
if (!m_vertexbuffer) {
gs_vbdata_destroy(m_vertexbufferdata);
m_vertexbufferdata = nullptr;
}
}
if (m_vertexbuffer) {
obs_enter_graphics();
gs_vertexbuffer_destroy(m_vertexbuffer);
obs_leave_graphics();
m_vertexbuffer = nullptr;
}
m_vertexbuffer = nullptr;
}
#pragma endregion Constructor & Destructor
#pragma region Copy & Move Constructor
GS::VertexBuffer::VertexBuffer(VertexBuffer& other) : VertexBuffer(other.m_capacity) {
}
GS::VertexBuffer::VertexBuffer(gs_vertbuffer_t* vb) {
m_vertexbuffer = vb;
}
#pragma endregion Copy & Move Constructor
void GS::VertexBuffer::resize(size_t new_size) {
if (new_size > m_capacity) {
throw std::out_of_range("new_size out of range");
}
m_size = new_size;
}
size_t GS::VertexBuffer::size() {
return m_size;
}
bool GS::VertexBuffer::empty() {
return m_size == 0;
}
const GS::Vertex GS::VertexBuffer::at(size_t idx) {
if ((idx < 0) || (idx >= m_size)) {
throw std::out_of_range("idx out of range");
}
GS::Vertex vtx;
vtx.position = &m_positions[idx];
vtx.normal = &m_normals[idx];
vtx.tangent = &m_tangents[idx];
vtx.color = &m_colors[idx];
for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) {
vtx.uv[n] = &m_uvs[n][idx];
}
return vtx;
}
const GS::Vertex GS::VertexBuffer::operator[](const size_t pos) {
return at(pos);
}
void GS::VertexBuffer::set_uv_layers(uint32_t layers) {
m_uvwLayers = layers;
m_layers = layers;
}
uint32_t GS::VertexBuffer::uv_layers() {
return m_uvwLayers;
return m_layers;
}
gs_vertbuffer_t* GS::VertexBuffer::get(bool refreshGPU) {
if (refreshGPU) {
if (size() > m_maximumVertices)
throw std::runtime_error("Too many vertices in Vertex Buffer.");
if (!refreshGPU)
return m_vertexbuffer;
// Update data pointer from Graphics Subsystem.
m_vertexbufferdata = gs_vertexbuffer_get_data(m_vertexbuffer);
std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data));
// Resize buffers.
m_data.positions.resize(m_maximumVertices);
m_data.normals.resize(m_maximumVertices);
m_data.tangents.resize(m_maximumVertices);
m_data.colors.resize(m_maximumVertices);
m_data.uvws.resize(m_uvwLayers);
m_data.uvwdata.resize(m_uvwLayers);
// Assign new data.
m_vertexbufferdata->num = m_maximumVertices;
m_vertexbufferdata->points = m_data.positions.data();
m_vertexbufferdata->normals = m_data.normals.data();
m_vertexbufferdata->tangents = m_data.tangents.data();
m_vertexbufferdata->colors = m_data.colors.data();
m_vertexbufferdata->num_tex = m_uvwLayers;
for (uint32_t n = 0; n < m_uvwLayers; n++) {
m_data.uvws[n].resize(m_maximumVertices);
m_data.uvwdata[n].width = 4;
m_data.uvwdata[n].array = m_data.uvws[n].data();
}
m_vertexbufferdata->tvarray = m_data.uvwdata.data();
// Copy Data
for (size_t vertexIdx = 0; vertexIdx < size(); vertexIdx++) {
GS::Vertex& v = this->at(vertexIdx);
vec3_copy(&m_data.positions[vertexIdx], &(v.position));
vec3_copy(&m_data.normals[vertexIdx], &(v.normal));
vec3_copy(&m_data.tangents[vertexIdx], &(v.tangent));
for (size_t texcoordIdx = 0; texcoordIdx < m_uvwLayers; texcoordIdx++) {
vec4_copy(&m_data.uvws[texcoordIdx][vertexIdx], &(v.uv[texcoordIdx]));
}
m_data.colors[vertexIdx] = v.color;
}
// Update GPU
obs_enter_graphics();
gs_vertexbuffer_flush(m_vertexbuffer);
obs_leave_graphics();
// WORKAROUND: OBS Studio 20.x and below incorrectly deletes data that it doesn't own.
std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data));
m_vertexbufferdata->num = m_maximumVertices;
m_vertexbufferdata->num_tex = m_uvwLayers;
for (uint32_t n = 0; n < m_uvwLayers; n++) {
m_data.uvwdata[n].width = 4;
}
if (m_size > m_capacity)
throw std::out_of_range("size is larger than capacity");
// Update VertexBuffer data.
m_vertexbufferdata = gs_vertexbuffer_get_data(m_vertexbuffer);
std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data));
m_vertexbufferdata->num = m_capacity;
m_vertexbufferdata->points = m_positions;
m_vertexbufferdata->normals = m_normals;
m_vertexbufferdata->tangents = m_tangents;
m_vertexbufferdata->colors = m_colors;
m_vertexbufferdata->num_tex = m_layers;
m_vertexbufferdata->tvarray = m_layerdata;
for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) {
m_layerdata[n].array = m_uvs[n];
m_layerdata[n].width = 4;
}
// Update GPU
obs_enter_graphics();
gs_vertexbuffer_flush(m_vertexbuffer);
obs_leave_graphics();
// WORKAROUND: OBS Studio 20.x and below incorrectly deletes data that it doesn't own.
std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data));
m_vertexbufferdata->num = m_capacity;
m_vertexbufferdata->num_tex = m_layers;
for (uint32_t n = 0; n < m_layers; n++) {
m_layerdata[n].width = 4;
}
return m_vertexbuffer;
}

View file

@ -18,11 +18,11 @@
*/
#pragma once
#include "gs-limits.h"
#include "gs-vertex.h"
#include "util-math.h"
#include "util-memory.h"
#include <inttypes.h>
#include <vector>
extern "C" {
#pragma warning( push )
#pragma warning( disable: 4201 )
@ -31,8 +31,9 @@ extern "C" {
}
namespace GS {
class VertexBuffer : public std::vector<Vertex, util::AlignmentAllocator<Vertex, 16>> {
class VertexBuffer {
public:
#pragma region Constructor & Destructor
/*!
* \brief Create a Vertex Buffer with specific size
*
@ -47,26 +48,36 @@ namespace GS {
*/
VertexBuffer();
virtual ~VertexBuffer();
#pragma endregion Constructor & Destructor
#pragma region Copy & Move Constructor
/*!
* \brief Create a copy of a Vertex Buffer
* Full Description below
*
* \param other The Vertex Buffer to copy
*/
* \brief Create a copy of a Vertex Buffer
* Full Description below
*
* \param other The Vertex Buffer to copy
*/
VertexBuffer(VertexBuffer& other);
/*!
* \brief Create a Vertex Buffer from a Vertex array
* \brief Create a copy of a Vertex Buffer
* Full Description below
*
* \param other The Vertex array to use
* \param other The Vertex Buffer to copy
*/
VertexBuffer(std::vector<Vertex*>& other);
VertexBuffer(gs_vertbuffer_t* other);
#pragma endregion Copy & Move Constructor
void resize(size_t new_size);
size_t size();
VertexBuffer(gs_vertbuffer_t* vb);
bool empty();
virtual ~VertexBuffer();
const GS::Vertex at(size_t idx);
const GS::Vertex operator[](const size_t pos);
void set_uv_layers(uint32_t layers);
@ -76,20 +87,23 @@ namespace GS {
gs_vertbuffer_t* get(bool refreshGPU);
protected:
uint32_t m_maximumVertices;
uint32_t m_uvwLayers;
private:
uint32_t m_size;
uint32_t m_capacity;
uint32_t m_layers;
// Memory Storage
vec3 *m_positions;
vec3 *m_normals;
vec3 *m_tangents;
uint32_t *m_colors;
vec4 *m_uvs[MAXIMUM_UVW_LAYERS];
// OBS GS Data
gs_vb_data* m_vertexbufferdata;
gs_vertbuffer_t* m_vertexbuffer;
// Data Storage
struct {
std::vector<util::vec3a> positions;
std::vector<util::vec3a> normals;
std::vector<util::vec3a> tangents;
std::vector<uint32_t> colors;
std::vector<std::vector<util::vec4a>> uvws;
std::vector<gs_tvertarray> uvwdata;
} m_data;
gs_tvertarray* m_layerdata;
};
}