Merge pull request #7138 from ameerj/vic-fmt

vic: Implement RGBX8 video frame format
This commit is contained in:
Mai M 2021-10-08 19:19:20 -04:00 committed by GitHub
commit 39cd6306e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 167 additions and 138 deletions

View file

@ -16,6 +16,7 @@ extern "C" {
} }
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_field.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "video_core/command_classes/nvdec.h" #include "video_core/command_classes/nvdec.h"
@ -26,6 +27,25 @@ extern "C" {
#include "video_core/textures/decoders.h" #include "video_core/textures/decoders.h"
namespace Tegra { namespace Tegra {
namespace {
enum class VideoPixelFormat : u64_le {
RGBA8 = 0x1f,
BGRA8 = 0x20,
RGBX8 = 0x23,
Yuv420 = 0x44,
};
} // Anonymous namespace
union VicConfig {
u64_le raw{};
BitField<0, 7, VideoPixelFormat> pixel_format;
BitField<7, 2, u64_le> chroma_loc_horiz;
BitField<9, 2, u64_le> chroma_loc_vert;
BitField<11, 4, u64_le> block_linear_kind;
BitField<15, 4, u64_le> block_linear_height_log2;
BitField<32, 14, u64_le> surface_width_minus1;
BitField<46, 14, u64_le> surface_height_minus1;
};
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
: gpu(gpu_), : gpu(gpu_),
@ -65,27 +85,55 @@ void Vic::Execute() {
if (!frame) { if (!frame) {
return; return;
} }
const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value()); const u64 surface_width = config.surface_width_minus1 + 1;
switch (pixel_format) { const u64 surface_height = config.surface_height_minus1 + 1;
if (static_cast<u64>(frame->width) != surface_width ||
static_cast<u64>(frame->height) != surface_height) {
// TODO: Properly support multiple video streams with differing frame dimensions
LOG_WARNING(Debug, "Frame dimensions {}x{} do not match expected surface dimensions {}x{}",
frame->width, frame->height, surface_width, surface_height);
return;
}
switch (config.pixel_format) {
case VideoPixelFormat::RGBA8:
case VideoPixelFormat::BGRA8: case VideoPixelFormat::BGRA8:
case VideoPixelFormat::RGBA8: { case VideoPixelFormat::RGBX8:
WriteRGBFrame(frame, config);
break;
case VideoPixelFormat::Yuv420:
WriteYUVFrame(frame, config);
break;
default:
UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
break;
}
}
void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
if (scaler_ctx == nullptr || frame->width != scaler_width || if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) {
frame->height != scaler_height) { const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
const AVPixelFormat target_format = switch (pixel_format) {
(pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; case VideoPixelFormat::RGBA8:
return AV_PIX_FMT_RGBA;
case VideoPixelFormat::BGRA8:
return AV_PIX_FMT_BGRA;
case VideoPixelFormat::RGBX8:
return AV_PIX_FMT_RGB0;
default:
return AV_PIX_FMT_RGBA;
}
}();
sws_freeContext(scaler_ctx); sws_freeContext(scaler_ctx);
scaler_ctx = nullptr; // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
// Frames are decoded into either YUV420 or NV12 formats. Convert to desired format
scaler_ctx = sws_getContext(frame->width, frame->height, scaler_ctx = sws_getContext(frame->width, frame->height,
static_cast<AVPixelFormat>(frame->format), frame->width, static_cast<AVPixelFormat>(frame->format), frame->width,
frame->height, target_format, 0, nullptr, nullptr, nullptr); frame->height, target_format, 0, nullptr, nullptr, nullptr);
scaler_width = frame->width; scaler_width = frame->width;
scaler_height = frame->height; scaler_height = frame->height;
converted_frame_buffer.reset();
} }
// Get Converted frame // Get Converted frame
const u32 width = static_cast<u32>(frame->width); const u32 width = static_cast<u32>(frame->width);
@ -99,17 +147,16 @@ void Vic::Execute() {
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
&converted_frame_buf_addr, converted_stride.data()); converted_stride.data());
const u32 blk_kind = static_cast<u32>(config.block_linear_kind); const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
if (blk_kind != 0) { if (blk_kind != 0) {
// swizzle pitch linear to block linear // swizzle pitch linear to block linear
const u32 block_height = static_cast<u32>(config.block_linear_height_log2); const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
const auto size = const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
luma_buffer.resize(size); luma_buffer.resize(size);
Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
converted_frame_buffer.get(), block_height, 0, 0); converted_frame_buffer.get(), block_height, 0, 0);
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
@ -118,9 +165,9 @@ void Vic::Execute() {
gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
linear_size); linear_size);
} }
break;
} }
case VideoPixelFormat::Yuv420: {
void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
const std::size_t surface_width = config.surface_width_minus1 + 1; const std::size_t surface_width = config.surface_width_minus1 + 1;
@ -187,12 +234,6 @@ void Vic::Execute() {
} }
gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
chroma_buffer.size()); chroma_buffer.size());
break;
}
default:
UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
break;
}
} }
} // namespace Tegra } // namespace Tegra

View file

@ -6,7 +6,6 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
struct SwsContext; struct SwsContext;
@ -14,6 +13,7 @@ struct SwsContext;
namespace Tegra { namespace Tegra {
class GPU; class GPU;
class Nvdec; class Nvdec;
union VicConfig;
class Vic { class Vic {
public: public:
@ -27,6 +27,7 @@ public:
}; };
explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
~Vic(); ~Vic();
/// Write to the device state. /// Write to the device state.
@ -35,22 +36,9 @@ public:
private: private:
void Execute(); void Execute();
enum class VideoPixelFormat : u64_le { void WriteRGBFrame(const AVFrame* frame, const VicConfig& config);
RGBA8 = 0x1f,
BGRA8 = 0x20,
Yuv420 = 0x44,
};
union VicConfig { void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
u64_le raw{};
BitField<0, 7, u64_le> pixel_format;
BitField<7, 2, u64_le> chroma_loc_horiz;
BitField<9, 2, u64_le> chroma_loc_vert;
BitField<11, 4, u64_le> block_linear_kind;
BitField<15, 4, u64_le> block_linear_height_log2;
BitField<32, 14, u64_le> surface_width_minus1;
BitField<46, 14, u64_le> surface_height_minus1;
};
GPU& gpu; GPU& gpu;
std::shared_ptr<Tegra::Nvdec> nvdec_processor; std::shared_ptr<Tegra::Nvdec> nvdec_processor;