primera implementacio de postfx

This commit is contained in:
2026-03-23 13:18:36 +01:00
parent 99cc803f21
commit 2f3161d701
23 changed files with 13306 additions and 771 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,720 @@
#include "rendering/sdl3gpu/sdl3gpu_shader.hpp"
#include <SDL3/SDL_log.h>
#include <algorithm> // std::min, std::max, std::floor
#include <cmath> // std::floor
#include <cstring> // memcpy, strlen
#ifndef __APPLE__
#include "rendering/sdl3gpu/postfx_frag_spv.h"
#include "rendering/sdl3gpu/postfx_vert_spv.h"
#endif
#ifdef __APPLE__
// ============================================================================
// MSL shaders (Metal Shading Language) — macOS
// ============================================================================
// NOLINTBEGIN(readability-identifier-naming)
static const char* POSTFX_VERT_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
vertex PostVOut postfx_vs(uint vid [[vertex_id]]) {
const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} };
const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} };
PostVOut out;
out.pos = float4(positions[vid], 0.0, 1.0);
out.uv = uvs[vid];
return out;
}
)";
static const char* POSTFX_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct PostFXUniforms {
float vignette_strength;
float chroma_strength;
float scanline_strength;
float screen_height;
float mask_strength;
float gamma_strength;
float curvature;
float bleeding;
float pixel_scale;
float time;
float oversample; // 1.0 = sin SS, 3.0 = 3× supersampling
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz
};
// YCbCr helpers for NTSC bleeding
static float3 rgb_to_ycc(float3 rgb) {
return float3(
0.299f*rgb.r + 0.587f*rgb.g + 0.114f*rgb.b,
-0.169f*rgb.r - 0.331f*rgb.g + 0.500f*rgb.b + 0.5f,
0.500f*rgb.r - 0.419f*rgb.g - 0.081f*rgb.b + 0.5f
);
}
static float3 ycc_to_rgb(float3 ycc) {
float y = ycc.x;
float cb = ycc.y - 0.5f;
float cr = ycc.z - 0.5f;
return clamp(float3(
y + 1.402f*cr,
y - 0.344f*cb - 0.714f*cr,
y + 1.772f*cb
), 0.0f, 1.0f);
}
fragment float4 postfx_fs(PostVOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler samp [[sampler(0)]],
constant PostFXUniforms& u [[buffer(0)]]) {
float2 uv = in.uv;
// Curvatura barrel CRT
if (u.curvature > 0.0f) {
float2 c = uv - 0.5f;
float rsq = dot(c, c);
float2 dist = float2(0.05f, 0.1f) * u.curvature;
float2 barrelScale = 1.0f - 0.23f * dist;
c += c * (dist * rsq);
c *= barrelScale;
if (abs(c.x) >= 0.5f || abs(c.y) >= 0.5f) {
return float4(0.0f, 0.0f, 0.0f, 1.0f);
}
uv = c + 0.5f;
}
// Muestra base
float3 base = scene.sample(samp, uv).rgb;
// Sangrado NTSC — difuminado horizontal de crominancia.
// step = 1 pixel de juego en espacio UV (corrige SS: scene.get_width() = game_w * oversample).
float3 colour;
if (u.bleeding > 0.0f) {
float tw = float(scene.get_width());
float step = u.oversample / tw; // 1 pixel lógico en UV
float3 ycc = rgb_to_ycc(base);
float3 ycc_l2 = rgb_to_ycc(scene.sample(samp, uv - float2(2.0f*step, 0.0f)).rgb);
float3 ycc_l1 = rgb_to_ycc(scene.sample(samp, uv - float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r1 = rgb_to_ycc(scene.sample(samp, uv + float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r2 = rgb_to_ycc(scene.sample(samp, uv + float2(2.0f*step, 0.0f)).rgb);
ycc.yz = (ycc_l2.yz + ycc_l1.yz*2.0f + ycc.yz*2.0f + ycc_r1.yz*2.0f + ycc_r2.yz) / 8.0f;
colour = mix(base, ycc_to_rgb(ycc), u.bleeding);
} else {
colour = base;
}
// Aberración cromática (drift animado con time para efecto NTSC real)
float ca = u.chroma_strength * 0.005f * (1.0f + 0.15f * sin(u.time * 7.3f));
colour.r = scene.sample(samp, uv + float2(ca, 0.0f)).r;
colour.b = scene.sample(samp, uv - float2(ca, 0.0f)).b;
// Corrección gamma (linealizar antes de scanlines, codificar después)
if (u.gamma_strength > 0.0f) {
float3 lin = pow(colour, float3(2.4f));
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — 1 pixel físico oscuro por fila lógica.
// Usa uv.y (independiente del offset de letterbox) con pixel_scale para
// calcular la posición dentro de la fila en coordenadas físicas.
// 3x: 1 dark + 2 bright. 4x: 1 dark + 3 bright.
// bright=3.5×, dark floor=0.42 (mantiene aspecto CRT original).
if (u.scanline_strength > 0.0f) {
float ps = max(1.0f, round(u.pixel_scale));
float frac_in_row = fract(uv.y * u.screen_height);
float row_pos = floor(frac_in_row * ps);
float is_dark = step(ps - 1.0f, row_pos);
float scan = mix(3.5f, 0.42f, is_dark);
colour *= mix(1.0f, scan, u.scanline_strength);
}
if (u.gamma_strength > 0.0f) {
float3 enc = pow(colour, float3(1.0f/2.2f));
colour = mix(colour, enc, u.gamma_strength);
}
// Viñeta
float2 d = uv - 0.5f;
float vignette = 1.0f - dot(d, d) * u.vignette_strength;
colour *= clamp(vignette, 0.0f, 1.0f);
// Máscara de fósforo RGB — después de scanlines (orden original):
// filas brillantes saturadas → máscara invisible, filas oscuras → RGB visible.
if (u.mask_strength > 0.0f) {
float whichMask = fract(in.pos.x * 0.3333333f);
float3 mask = float3(0.80f);
if (whichMask < 0.3333333f) mask.x = 1.0f;
else if (whichMask < 0.6666667f) mask.y = 1.0f;
else mask.z = 1.0f;
colour = mix(colour, colour * mask, u.mask_strength);
}
// Parpadeo de fósforo CRT (~50 Hz)
if (u.flicker > 0.0f) {
float flicker_wave = sin(u.time * 100.0f) * 0.5f + 0.5f;
colour *= 1.0f - u.flicker * 0.04f * flicker_wave;
}
return float4(colour, 1.0f);
}
)";
// NOLINTEND(readability-identifier-naming)
#endif // __APPLE__
namespace Rendering {
// ---------------------------------------------------------------------------
// Destructor
// ---------------------------------------------------------------------------
SDL3GPUShader::~SDL3GPUShader() {
destroy();
}
// ---------------------------------------------------------------------------
// init
// ---------------------------------------------------------------------------
auto SDL3GPUShader::init(SDL_Window* window,
SDL_Texture* texture,
const std::string& /*vertex_source*/,
const std::string& /*fragment_source*/) -> bool {
// Si ya estaba inicializado (p.ej. al cambiar borde), liberar recursos
// de textura/pipeline pero mantener el device vivo para evitar conflictos
// con SDL_Renderer en Windows/Vulkan.
if (is_initialized_) {
cleanup();
}
window_ = window;
// Dimensions from the SDL_Texture placeholder
float fw = 0.0F;
float fh = 0.0F;
SDL_GetTextureSize(texture, &fw, &fh);
game_width_ = static_cast<int>(fw);
game_height_ = static_cast<int>(fh);
tex_width_ = game_width_ * oversample_;
tex_height_ = game_height_ * oversample_;
uniforms_.screen_height = static_cast<float>(tex_height_); // Altura de la textura GPU
uniforms_.oversample = static_cast<float>(oversample_);
// ----------------------------------------------------------------
// 1. Create GPU device (solo si no existe ya)
// ----------------------------------------------------------------
if (device_ == nullptr) {
#ifdef __APPLE__
const SDL_GPUShaderFormat PREFERRED = SDL_GPU_SHADERFORMAT_MSL | SDL_GPU_SHADERFORMAT_METALLIB;
#else
const SDL_GPUShaderFormat PREFERRED = SDL_GPU_SHADERFORMAT_SPIRV;
#endif
device_ = SDL_CreateGPUDevice(PREFERRED, false, nullptr);
if (device_ == nullptr) {
SDL_Log("SDL3GPUShader: SDL_CreateGPUDevice failed: %s", SDL_GetError());
return false;
}
SDL_Log("SDL3GPUShader: driver = %s", SDL_GetGPUDeviceDriver(device_));
// ----------------------------------------------------------------
// 2. Claim window (una sola vez — no liberar hasta destroy())
// ----------------------------------------------------------------
if (!SDL_ClaimWindowForGPUDevice(device_, window_)) {
SDL_Log("SDL3GPUShader: SDL_ClaimWindowForGPUDevice failed: %s", SDL_GetError());
SDL_DestroyGPUDevice(device_);
device_ = nullptr;
return false;
}
SDL_SetGPUSwapchainParameters(device_, window_, SDL_GPU_SWAPCHAINCOMPOSITION_SDR, vsync_ ? SDL_GPU_PRESENTMODE_VSYNC : SDL_GPU_PRESENTMODE_IMMEDIATE);
}
// ----------------------------------------------------------------
// 3. Create scene texture (upload target + sampler source)
// Format: B8G8R8A8_UNORM matches SDL ARGB8888 byte layout on LE
// ----------------------------------------------------------------
SDL_GPUTextureCreateInfo tex_info = {};
tex_info.type = SDL_GPU_TEXTURETYPE_2D;
tex_info.format = SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM;
tex_info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER;
tex_info.width = static_cast<Uint32>(tex_width_);
tex_info.height = static_cast<Uint32>(tex_height_);
tex_info.layer_count_or_depth = 1;
tex_info.num_levels = 1;
scene_texture_ = SDL_CreateGPUTexture(device_, &tex_info);
if (scene_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create scene texture: %s", SDL_GetError());
cleanup();
return false;
}
// ----------------------------------------------------------------
// 4. Create upload transfer buffer (CPU → GPU, size = w*h*4 bytes)
// ----------------------------------------------------------------
SDL_GPUTransferBufferCreateInfo tb_info = {};
tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
tb_info.size = static_cast<Uint32>(tex_width_ * tex_height_ * 4);
upload_buffer_ = SDL_CreateGPUTransferBuffer(device_, &tb_info);
if (upload_buffer_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create upload buffer: %s", SDL_GetError());
cleanup();
return false;
}
// ----------------------------------------------------------------
// 5. Create samplers: NEAREST (pixel art) + LINEAR (supersampling)
// ----------------------------------------------------------------
SDL_GPUSamplerCreateInfo samp_info = {};
samp_info.min_filter = SDL_GPU_FILTER_NEAREST;
samp_info.mag_filter = SDL_GPU_FILTER_NEAREST;
samp_info.mipmap_mode = SDL_GPU_SAMPLERMIPMAPMODE_NEAREST;
samp_info.address_mode_u = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
samp_info.address_mode_v = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
samp_info.address_mode_w = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
sampler_ = SDL_CreateGPUSampler(device_, &samp_info);
if (sampler_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create sampler: %s", SDL_GetError());
cleanup();
return false;
}
SDL_GPUSamplerCreateInfo lsamp_info = {};
lsamp_info.min_filter = SDL_GPU_FILTER_LINEAR;
lsamp_info.mag_filter = SDL_GPU_FILTER_LINEAR;
lsamp_info.mipmap_mode = SDL_GPU_SAMPLERMIPMAPMODE_NEAREST;
lsamp_info.address_mode_u = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
lsamp_info.address_mode_v = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
lsamp_info.address_mode_w = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
linear_sampler_ = SDL_CreateGPUSampler(device_, &lsamp_info);
if (linear_sampler_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create linear sampler: %s", SDL_GetError());
cleanup();
return false;
}
// ----------------------------------------------------------------
// 6. Create PostFX graphics pipeline
// ----------------------------------------------------------------
if (!createPipeline()) {
cleanup();
return false;
}
is_initialized_ = true;
SDL_Log("SDL3GPUShader: initialized OK (%dx%d)", tex_width_, tex_height_);
return true;
}
// ---------------------------------------------------------------------------
// createPipeline
// ---------------------------------------------------------------------------
auto SDL3GPUShader::createPipeline() -> bool {
const SDL_GPUTextureFormat SWAPCHAIN_FMT = SDL_GetGPUSwapchainTextureFormat(device_, window_);
#ifdef __APPLE__
SDL_GPUShader* vert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, POSTFX_FRAG_MSL, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* vert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderSPIRV(device_, kpostfx_frag_spv, kpostfx_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((vert == nullptr) || (frag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile PostFX shaders");
if (vert != nullptr) { SDL_ReleaseGPUShader(device_, vert); }
if (frag != nullptr) { SDL_ReleaseGPUShader(device_, frag); }
return false;
}
SDL_GPUColorTargetBlendState no_blend = {};
no_blend.enable_blend = false;
no_blend.enable_color_write_mask = false;
SDL_GPUColorTargetDescription color_target = {};
color_target.format = SWAPCHAIN_FMT;
color_target.blend_state = no_blend;
SDL_GPUVertexInputState no_input = {};
SDL_GPUGraphicsPipelineCreateInfo pipe_info = {};
pipe_info.vertex_shader = vert;
pipe_info.fragment_shader = frag;
pipe_info.vertex_input_state = no_input;
pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
pipe_info.target_info.num_color_targets = 1;
pipe_info.target_info.color_target_descriptions = &color_target;
pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &pipe_info);
SDL_ReleaseGPUShader(device_, vert);
SDL_ReleaseGPUShader(device_, frag);
if (pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: pipeline creation failed: %s", SDL_GetError());
return false;
}
return true;
}
// ---------------------------------------------------------------------------
// uploadPixels — copies ARGB8888 CPU pixels into the GPU transfer buffer.
// Con supersampling (oversample_ > 1) expande cada pixel del juego a un bloque
// oversample × oversample y hornea la scanline oscura en la última fila del bloque.
// ---------------------------------------------------------------------------
void SDL3GPUShader::uploadPixels(const Uint32* pixels, int width, int height) {
if (!is_initialized_ || (upload_buffer_ == nullptr)) { return; }
void* mapped = SDL_MapGPUTransferBuffer(device_, upload_buffer_, false);
if (mapped == nullptr) {
SDL_Log("SDL3GPUShader: SDL_MapGPUTransferBuffer failed: %s", SDL_GetError());
return;
}
if (oversample_ <= 1) {
// Path sin supersampling: copia directa
std::memcpy(mapped, pixels, static_cast<size_t>(width * height * 4));
} else {
// Path con supersampling: expande cada pixel a OS×OS, oscurece última fila.
// Replica la fórmula del shader: mix(3.5, 0.42, scanline_strength).
auto* out = static_cast<Uint32*>(mapped);
const int OS = oversample_;
const float BRIGHT_MUL = 1.0F + (baked_scanline_strength_ * 2.5F); // rows 0..OS-2
const float DARK_MUL = 1.0F - (baked_scanline_strength_ * 0.58F); // row OS-1
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
const Uint32 SRC = pixels[(y * width) + x];
const Uint32 ALPHA = (SRC >> 24) & 0xFFU;
const auto FR = static_cast<float>((SRC >> 16) & 0xFFU);
const auto FG = static_cast<float>((SRC >> 8) & 0xFFU);
const auto FB = static_cast<float>(SRC & 0xFFU);
auto make_px = [ALPHA](float rv, float gv, float bv) -> Uint32 {
auto cl = [](float v) -> Uint32 { return static_cast<Uint32>(std::min(255.0F, v)); };
return (ALPHA << 24) | (cl(rv) << 16) | (cl(gv) << 8) | cl(bv);
};
const Uint32 BRIGHT = make_px(FR * BRIGHT_MUL, FG * BRIGHT_MUL, FB * BRIGHT_MUL);
const Uint32 DARK = make_px(FR * DARK_MUL, FG * DARK_MUL, FB * DARK_MUL);
for (int dy = 0; dy < OS; ++dy) {
const Uint32 OUT_PX = (dy == OS - 1) ? DARK : BRIGHT;
const int DST_Y = (y * OS) + dy;
for (int dx = 0; dx < OS; ++dx) {
out[(DST_Y * (width * OS)) + ((x * OS) + dx)] = OUT_PX;
}
}
}
}
}
SDL_UnmapGPUTransferBuffer(device_, upload_buffer_);
}
// ---------------------------------------------------------------------------
// render — upload scene texture + PostFX pass → swapchain
// ---------------------------------------------------------------------------
void SDL3GPUShader::render() {
if (!is_initialized_) { return; }
SDL_GPUCommandBuffer* cmd = SDL_AcquireGPUCommandBuffer(device_);
if (cmd == nullptr) {
SDL_Log("SDL3GPUShader: SDL_AcquireGPUCommandBuffer failed: %s", SDL_GetError());
return;
}
// ---- Copy pass: transfer buffer → scene texture ----
SDL_GPUCopyPass* copy = SDL_BeginGPUCopyPass(cmd);
if (copy != nullptr) {
SDL_GPUTextureTransferInfo src = {};
src.transfer_buffer = upload_buffer_;
src.offset = 0;
src.pixels_per_row = static_cast<Uint32>(tex_width_);
src.rows_per_layer = static_cast<Uint32>(tex_height_);
SDL_GPUTextureRegion dst = {};
dst.texture = scene_texture_;
dst.w = static_cast<Uint32>(tex_width_);
dst.h = static_cast<Uint32>(tex_height_);
dst.d = 1;
SDL_UploadToGPUTexture(copy, &src, &dst, false);
SDL_EndGPUCopyPass(copy);
}
// ---- Acquire swapchain texture ----
SDL_GPUTexture* swapchain = nullptr;
Uint32 sw = 0;
Uint32 sh = 0;
if (!SDL_AcquireGPUSwapchainTexture(cmd, window_, &swapchain, &sw, &sh)) {
SDL_Log("SDL3GPUShader: SDL_AcquireGPUSwapchainTexture failed: %s", SDL_GetError());
SDL_SubmitGPUCommandBuffer(cmd);
return;
}
if (swapchain == nullptr) {
// Window minimized — skip frame
SDL_SubmitGPUCommandBuffer(cmd);
return;
}
// ---- Render pass: PostFX → swapchain ----
SDL_GPUColorTargetInfo color_target = {};
color_target.texture = swapchain;
color_target.load_op = SDL_GPU_LOADOP_CLEAR;
color_target.store_op = SDL_GPU_STOREOP_STORE;
color_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPURenderPass* pass = SDL_BeginGPURenderPass(cmd, &color_target, 1, nullptr);
if (pass != nullptr) {
SDL_BindGPUGraphicsPipeline(pass, pipeline_);
// Calcular viewport usando las dimensiones lógicas del canvas (game_width_/height_),
// no las de la textura GPU (que pueden ser game×3 con supersampling).
// El GPU escala la textura para cubrir el viewport independientemente de su resolución.
float vx = 0.0F;
float vy = 0.0F;
float vw = 0.0F;
float vh = 0.0F;
if (integer_scale_) {
const int SCALE = std::max(1, std::min(static_cast<int>(sw) / game_width_, static_cast<int>(sh) / game_height_));
vw = static_cast<float>(game_width_ * SCALE);
vh = static_cast<float>(game_height_ * SCALE);
} else {
const float SCALE = std::min(
static_cast<float>(sw) / static_cast<float>(game_width_),
static_cast<float>(sh) / static_cast<float>(game_height_));
vw = static_cast<float>(game_width_) * SCALE;
vh = static_cast<float>(game_height_) * SCALE;
}
vx = std::floor((static_cast<float>(sw) - vw) * 0.5F);
vy = std::floor((static_cast<float>(sh) - vh) * 0.5F);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(pass, &vp);
// pixel_scale: pixels físicos por pixel lógico de juego (para scanlines sin SS).
// Con SS las scanlines están horneadas en CPU → scanline_strength=0 → no se usa.
uniforms_.pixel_scale = (game_height_ > 0)
? (vh / static_cast<float>(game_height_))
: 1.0F;
uniforms_.time = static_cast<float>(SDL_GetTicks()) / 1000.0F;
uniforms_.oversample = static_cast<float>(oversample_);
// Con supersampling usamos LINEAR para que el escalado a zooms no-múltiplo-de-3
// promedia correctamente las filas de scanline horneadas en CPU.
SDL_GPUSampler* active_sampler = (oversample_ > 1 && linear_sampler_ != nullptr)
? linear_sampler_
: sampler_;
SDL_GPUTextureSamplerBinding binding = {};
binding.texture = scene_texture_;
binding.sampler = active_sampler;
SDL_BindGPUFragmentSamplers(pass, 0, &binding, 1);
SDL_PushGPUFragmentUniformData(cmd, 0, &uniforms_, sizeof(PostFXUniforms));
SDL_DrawGPUPrimitives(pass, 3, 1, 0, 0);
SDL_EndGPURenderPass(pass);
}
SDL_SubmitGPUCommandBuffer(cmd);
}
// ---------------------------------------------------------------------------
// cleanup — libera pipeline/texturas/buffer pero mantiene device + swapchain
// ---------------------------------------------------------------------------
void SDL3GPUShader::cleanup() {
is_initialized_ = false;
if (device_ != nullptr) {
SDL_WaitForGPUIdle(device_);
if (pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, pipeline_);
pipeline_ = nullptr;
}
if (scene_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
}
if (upload_buffer_ != nullptr) {
SDL_ReleaseGPUTransferBuffer(device_, upload_buffer_);
upload_buffer_ = nullptr;
}
if (sampler_ != nullptr) {
SDL_ReleaseGPUSampler(device_, sampler_);
sampler_ = nullptr;
}
if (linear_sampler_ != nullptr) {
SDL_ReleaseGPUSampler(device_, linear_sampler_);
linear_sampler_ = nullptr;
}
// device_ y el claim de la ventana se mantienen vivos
}
}
// ---------------------------------------------------------------------------
// destroy — limpieza completa incluyendo device y swapchain (solo al cerrar)
// ---------------------------------------------------------------------------
void SDL3GPUShader::destroy() {
cleanup();
if (device_ != nullptr) {
if (window_ != nullptr) {
SDL_ReleaseWindowFromGPUDevice(device_, window_);
}
SDL_DestroyGPUDevice(device_);
device_ = nullptr;
}
window_ = nullptr;
}
// ---------------------------------------------------------------------------
// Shader creation helpers
// ---------------------------------------------------------------------------
auto SDL3GPUShader::createShaderMSL(SDL_GPUDevice* device,
const char* msl_source,
const char* entrypoint,
SDL_GPUShaderStage stage,
Uint32 num_samplers,
Uint32 num_uniform_buffers) -> SDL_GPUShader* {
SDL_GPUShaderCreateInfo info = {};
info.code = reinterpret_cast<const Uint8*>(msl_source);
info.code_size = std::strlen(msl_source) + 1;
info.entrypoint = entrypoint;
info.format = SDL_GPU_SHADERFORMAT_MSL;
info.stage = stage;
info.num_samplers = num_samplers;
info.num_uniform_buffers = num_uniform_buffers;
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
if (shader == nullptr) {
SDL_Log("SDL3GPUShader: MSL shader '%s' failed: %s", entrypoint, SDL_GetError());
}
return shader;
}
auto SDL3GPUShader::createShaderSPIRV(SDL_GPUDevice* device, // NOLINT(readability-convert-member-functions-to-static)
const uint8_t* spv_code,
size_t spv_size,
const char* entrypoint,
SDL_GPUShaderStage stage,
Uint32 num_samplers,
Uint32 num_uniform_buffers) -> SDL_GPUShader* {
SDL_GPUShaderCreateInfo info = {};
info.code = spv_code;
info.code_size = spv_size;
info.entrypoint = entrypoint;
info.format = SDL_GPU_SHADERFORMAT_SPIRV;
info.stage = stage;
info.num_samplers = num_samplers;
info.num_uniform_buffers = num_uniform_buffers;
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
if (shader == nullptr) {
SDL_Log("SDL3GPUShader: SPIRV shader '%s' failed: %s", entrypoint, SDL_GetError());
}
return shader;
}
void SDL3GPUShader::setPostFXParams(const PostFXParams& p) {
uniforms_.vignette_strength = p.vignette;
uniforms_.chroma_strength = p.chroma;
uniforms_.mask_strength = p.mask;
uniforms_.gamma_strength = p.gamma;
uniforms_.curvature = p.curvature;
uniforms_.bleeding = p.bleeding;
uniforms_.flicker = p.flicker;
// Con supersampling las scanlines se hornean en CPU (uploadPixels).
// El shader recibe strength=0 para no aplicarlas de nuevo en GPU.
baked_scanline_strength_ = p.scanlines;
uniforms_.scanline_strength = (oversample_ > 1) ? 0.0F : p.scanlines;
}
void SDL3GPUShader::setVSync(bool vsync) {
vsync_ = vsync;
if (device_ != nullptr && window_ != nullptr) {
SDL_SetGPUSwapchainParameters(device_, window_, SDL_GPU_SWAPCHAINCOMPOSITION_SDR, vsync_ ? SDL_GPU_PRESENTMODE_VSYNC : SDL_GPU_PRESENTMODE_IMMEDIATE);
}
}
void SDL3GPUShader::setScaleMode(bool integer_scale) {
integer_scale_ = integer_scale;
}
// ---------------------------------------------------------------------------
// setOversample — cambia el factor SS; recrea texturas si ya está inicializado
// ---------------------------------------------------------------------------
void SDL3GPUShader::setOversample(int factor) {
const int NEW_FACTOR = std::max(1, factor);
if (NEW_FACTOR == oversample_) { return; }
oversample_ = NEW_FACTOR;
if (is_initialized_) {
reinitTexturesAndBuffer();
// scanline_strength se actualizará en el próximo setPostFXParams
}
}
// ---------------------------------------------------------------------------
// reinitTexturesAndBuffer — recrea scene_texture_ y upload_buffer_ con el
// tamaño actual (game × oversample_). No toca pipeline ni samplers.
// ---------------------------------------------------------------------------
auto SDL3GPUShader::reinitTexturesAndBuffer() -> bool {
if (device_ == nullptr) { return false; }
SDL_WaitForGPUIdle(device_);
if (scene_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
}
if (upload_buffer_ != nullptr) {
SDL_ReleaseGPUTransferBuffer(device_, upload_buffer_);
upload_buffer_ = nullptr;
}
tex_width_ = game_width_ * oversample_;
tex_height_ = game_height_ * oversample_;
uniforms_.screen_height = static_cast<float>(tex_height_);
uniforms_.oversample = static_cast<float>(oversample_);
SDL_GPUTextureCreateInfo tex_info = {};
tex_info.type = SDL_GPU_TEXTURETYPE_2D;
tex_info.format = SDL_GPU_TEXTUREFORMAT_B8G8R8A8_UNORM;
tex_info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER;
tex_info.width = static_cast<Uint32>(tex_width_);
tex_info.height = static_cast<Uint32>(tex_height_);
tex_info.layer_count_or_depth = 1;
tex_info.num_levels = 1;
scene_texture_ = SDL_CreateGPUTexture(device_, &tex_info);
if (scene_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: reinit — failed to create scene texture: %s", SDL_GetError());
return false;
}
SDL_GPUTransferBufferCreateInfo tb_info = {};
tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
tb_info.size = static_cast<Uint32>(tex_width_ * tex_height_ * 4);
upload_buffer_ = SDL_CreateGPUTransferBuffer(device_, &tb_info);
if (upload_buffer_ == nullptr) {
SDL_Log("SDL3GPUShader: reinit — failed to create upload buffer: %s", SDL_GetError());
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
return false;
}
SDL_Log("SDL3GPUShader: oversample %d → texture %dx%d", oversample_, tex_width_, tex_height_);
return true;
}
} // namespace Rendering

View File

@@ -0,0 +1,106 @@
#pragma once
#include <SDL3/SDL.h>
#include <SDL3/SDL_gpu.h>
#include "rendering/shader_backend.hpp"
// PostFX uniforms pushed to fragment stage each frame.
// Must match the MSL struct and GLSL uniform block layout.
// 12 floats = 48 bytes — meets Metal/Vulkan 16-byte alignment requirement.
struct PostFXUniforms {
float vignette_strength; // 0 = none, ~0.8 = subtle
float chroma_strength; // 0 = off, ~0.2 = subtle chromatic aberration
float scanline_strength; // 0 = off, 1 = full
float screen_height; // logical height in pixels (used by bleeding effect)
float mask_strength; // 0 = off, 1 = full phosphor dot mask
float gamma_strength; // 0 = off, 1 = full gamma 2.4/2.2 correction
float curvature; // 0 = flat, 1 = max barrel distortion
float bleeding; // 0 = off, 1 = max NTSC chrominance bleeding
float pixel_scale; // physical pixels per logical pixel (vh / tex_height_)
float time; // seconds since SDL init (SDL_GetTicks() / 1000.0f)
float oversample; // supersampling factor (1.0 = off, 3.0 = 3×SS)
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz — keep struct at 48 bytes (3 × 16)
};
namespace Rendering {
/**
* @brief Backend de shaders usando SDL3 GPU API (Metal en macOS, Vulkan/SPIR-V en Win/Linux)
*
* Reemplaza el backend OpenGL para que los shaders PostFX funcionen en macOS.
* Pipeline: Surface pixels (CPU) → SDL_GPUTransferBuffer → SDL_GPUTexture (scene)
* → PostFX render pass → swapchain → present
*/
class SDL3GPUShader : public ShaderBackend {
public:
SDL3GPUShader() = default;
~SDL3GPUShader() override;
auto init(SDL_Window* window,
SDL_Texture* texture,
const std::string& vertex_source,
const std::string& fragment_source) -> bool override;
void render() override;
void setTextureSize(float width, float height) override {}
void cleanup() final; // Libera pipeline/texturas pero mantiene el device vivo
void destroy(); // Limpieza completa (device + swapchain); llamar solo al cerrar
[[nodiscard]] auto isHardwareAccelerated() const -> bool override { return is_initialized_; }
// Sube píxeles ARGB8888 desde CPU; llamado antes de render()
void uploadPixels(const Uint32* pixels, int width, int height) override;
// Actualiza los parámetros de intensidad de los efectos PostFX
void setPostFXParams(const PostFXParams& p) override;
// Activa/desactiva VSync en el swapchain
void setVSync(bool vsync) override;
// Activa/desactiva escalado entero (integer scale)
void setScaleMode(bool integer_scale) override;
// Establece factor de supersampling (1 = off, 3 = 3×SS)
void setOversample(int factor) override;
private:
static auto createShaderMSL(SDL_GPUDevice* device,
const char* msl_source,
const char* entrypoint,
SDL_GPUShaderStage stage,
Uint32 num_samplers,
Uint32 num_uniform_buffers) -> SDL_GPUShader*;
static auto createShaderSPIRV(SDL_GPUDevice* device,
const uint8_t* spv_code,
size_t spv_size,
const char* entrypoint,
SDL_GPUShaderStage stage,
Uint32 num_samplers,
Uint32 num_uniform_buffers) -> SDL_GPUShader*;
auto createPipeline() -> bool;
auto reinitTexturesAndBuffer() -> bool; // Recrea textura y buffer con oversample actual
SDL_Window* window_ = nullptr;
SDL_GPUDevice* device_ = nullptr;
SDL_GPUGraphicsPipeline* pipeline_ = nullptr;
SDL_GPUTexture* scene_texture_ = nullptr;
SDL_GPUTransferBuffer* upload_buffer_ = nullptr;
SDL_GPUSampler* sampler_ = nullptr; // NEAREST — para path sin supersampling
SDL_GPUSampler* linear_sampler_ = nullptr; // LINEAR — para path con supersampling
PostFXUniforms uniforms_{.vignette_strength = 0.6F, .chroma_strength = 0.15F, .scanline_strength = 0.7F, .screen_height = 192.0F, .pixel_scale = 1.0F, .oversample = 1.0F};
int game_width_ = 0; // Dimensiones originales del canvas (sin SS)
int game_height_ = 0;
int tex_width_ = 0; // Dimensiones de la textura GPU (game × oversample_)
int tex_height_ = 0;
int oversample_ = 1; // Factor SS actual (1 o 3)
float baked_scanline_strength_ = 0.0F; // Guardado para hornear en CPU
bool is_initialized_ = false;
bool vsync_ = true;
bool integer_scale_ = false;
};
} // namespace Rendering