Files
aee/source/core/rendering/sdl3gpu/sdl3gpu_shader.cpp

1485 lines
64 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "core/rendering/sdl3gpu/sdl3gpu_shader.hpp"
#include <SDL3/SDL_log.h>
#include <algorithm> // std::min, std::max, std::floor
#include <cmath> // std::floor
#include <cstring> // memcpy, strlen
#include <iostream> // std::cout
#ifndef __APPLE__
#include "core/rendering/sdl3gpu/crtpi_frag_spv.h"
#include "core/rendering/sdl3gpu/downscale_frag_spv.h"
#include "core/rendering/sdl3gpu/postfx_frag_spv.h"
#include "core/rendering/sdl3gpu/postfx_vert_spv.h"
#include "core/rendering/sdl3gpu/upscale_frag_spv.h"
#endif
#ifdef __APPLE__
// ============================================================================
// MSL shaders (Metal Shading Language) — macOS
// ============================================================================
// NOLINTBEGIN(readability-identifier-naming)
static const char* POSTFX_VERT_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
vertex PostVOut postfx_vs(uint vid [[vertex_id]]) {
const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} };
const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} };
PostVOut out;
out.pos = float4(positions[vid], 0.0, 1.0);
out.uv = uvs[vid];
return out;
}
)";
static const char* POSTFX_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct PostFXUniforms {
float vignette_strength;
float chroma_strength;
float scanline_strength;
float screen_height;
float mask_strength;
float gamma_strength;
float curvature;
float bleeding;
float pixel_scale;
float time;
float oversample; // 1.0 = sin SS, 3.0 = 3× supersampling
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz
};
// YCbCr helpers for NTSC bleeding
static float3 rgb_to_ycc(float3 rgb) {
return float3(
0.299f*rgb.r + 0.587f*rgb.g + 0.114f*rgb.b,
-0.169f*rgb.r - 0.331f*rgb.g + 0.500f*rgb.b + 0.5f,
0.500f*rgb.r - 0.419f*rgb.g - 0.081f*rgb.b + 0.5f
);
}
static float3 ycc_to_rgb(float3 ycc) {
float y = ycc.x;
float cb = ycc.y - 0.5f;
float cr = ycc.z - 0.5f;
return clamp(float3(
y + 1.402f*cr,
y - 0.344f*cb - 0.714f*cr,
y + 1.772f*cb
), 0.0f, 1.0f);
}
fragment float4 postfx_fs(PostVOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler samp [[sampler(0)]],
constant PostFXUniforms& u [[buffer(0)]]) {
float2 uv = in.uv;
// Curvatura barrel CRT
if (u.curvature > 0.0f) {
float2 c = uv - 0.5f;
float rsq = dot(c, c);
float2 dist = float2(0.05f, 0.1f) * u.curvature;
float2 barrelScale = 1.0f - 0.23f * dist;
c += c * (dist * rsq);
c *= barrelScale;
if (abs(c.x) >= 0.5f || abs(c.y) >= 0.5f) {
return float4(0.0f, 0.0f, 0.0f, 1.0f);
}
uv = c + 0.5f;
}
// Muestra base
float3 base = scene.sample(samp, uv).rgb;
// Sangrado NTSC — difuminado horizontal de crominancia.
// step = 1 pixel de juego en espacio UV (corrige SS: scene.get_width() = game_w * oversample).
float3 colour;
if (u.bleeding > 0.0f) {
float tw = float(scene.get_width());
float step = u.oversample / tw; // 1 pixel lógico en UV
float3 ycc = rgb_to_ycc(base);
float3 ycc_l2 = rgb_to_ycc(scene.sample(samp, uv - float2(2.0f*step, 0.0f)).rgb);
float3 ycc_l1 = rgb_to_ycc(scene.sample(samp, uv - float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r1 = rgb_to_ycc(scene.sample(samp, uv + float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r2 = rgb_to_ycc(scene.sample(samp, uv + float2(2.0f*step, 0.0f)).rgb);
ycc.yz = (ycc_l2.yz + ycc_l1.yz*2.0f + ycc.yz*2.0f + ycc_r1.yz*2.0f + ycc_r2.yz) / 8.0f;
colour = mix(base, ycc_to_rgb(ycc), u.bleeding);
} else {
colour = base;
}
// Aberración cromática (drift animado con time para efecto NTSC real)
float ca = u.chroma_strength * 0.005f * (1.0f + 0.15f * sin(u.time * 7.3f));
colour.r = scene.sample(samp, uv + float2(ca, 0.0f)).r;
colour.b = scene.sample(samp, uv - float2(ca, 0.0f)).b;
// Corrección gamma (linealizar antes de scanlines, codificar después)
if (u.gamma_strength > 0.0f) {
float3 lin = pow(colour, float3(2.4f));
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — proporción 2/3 brillantes + 1/3 oscuras por fila lógica.
// Casos especiales: 1 subfila → sin efecto; 2 subfilas → 1+1 (50/50).
// Constantes ajustables:
const float SCAN_DARK_RATIO = 0.333f; // fracción de subfilas oscuras (ps >= 3)
const float SCAN_DARK_FLOOR = 0.42f; // multiplicador de brillo de subfilas oscuras
if (u.scanline_strength > 0.0f) {
float ps = max(1.0f, round(u.pixel_scale));
float frac_in_row = fract(uv.y * u.screen_height);
float row_pos = floor(frac_in_row * ps);
float bright_rows = (ps < 2.0f) ? ps : ((ps < 3.0f) ? 1.0f : floor(ps * (1.0f - SCAN_DARK_RATIO)));
float is_dark = step(bright_rows, row_pos);
float scan = mix(1.0f, SCAN_DARK_FLOOR, is_dark);
colour *= mix(1.0f, scan, u.scanline_strength);
}
if (u.gamma_strength > 0.0f) {
float3 enc = pow(colour, float3(1.0f/2.2f));
colour = mix(colour, enc, u.gamma_strength);
}
// Viñeta
float2 d = uv - 0.5f;
float vignette = 1.0f - dot(d, d) * u.vignette_strength;
colour *= clamp(vignette, 0.0f, 1.0f);
// Máscara de fósforo RGB — después de scanlines (orden original):
// filas brillantes saturadas → máscara invisible, filas oscuras → RGB visible.
if (u.mask_strength > 0.0f) {
float whichMask = fract(in.pos.x * 0.3333333f);
float3 mask = float3(0.80f);
if (whichMask < 0.3333333f) mask.x = 1.0f;
else if (whichMask < 0.6666667f) mask.y = 1.0f;
else mask.z = 1.0f;
colour = mix(colour, colour * mask, u.mask_strength);
}
// Parpadeo de fósforo CRT (~50 Hz)
if (u.flicker > 0.0f) {
float flicker_wave = sin(u.time * 100.0f) * 0.5f + 0.5f;
colour *= 1.0f - u.flicker * 0.04f * flicker_wave;
}
return float4(colour, 1.0f);
}
)";
static const char* UPSCALE_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct VertOut { float4 pos [[position]]; float2 uv; };
fragment float4 upscale_fs(VertOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler smp [[sampler(0)]])
{
return scene.sample(smp, in.uv);
}
)";
static const char* DOWNSCALE_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct VertOut { float4 pos [[position]]; float2 uv; };
struct DownscaleUniforms { int algorithm; float pad0; float pad1; float pad2; };
static float lanczos_w(float t, float a) {
t = abs(t);
if (t < 0.0001f) { return 1.0f; }
if (t >= a) { return 0.0f; }
const float PI = 3.14159265358979f;
float pt = PI * t;
return (a * sin(pt) * sin(pt / a)) / (pt * pt);
}
fragment float4 downscale_fs(VertOut in [[stage_in]],
texture2d<float> source [[texture(0)]],
sampler smp [[sampler(0)]],
constant DownscaleUniforms& u [[buffer(0)]])
{
float2 src_size = float2(source.get_width(), source.get_height());
float2 p = in.uv * src_size;
float2 p_floor = floor(p);
float a = (u.algorithm == 0) ? 2.0f : 3.0f;
int win = int(a);
float4 color = float4(0.0f);
float weight_sum = 0.0f;
for (int j = -win; j <= win; j++) {
for (int i = -win; i <= win; i++) {
float2 tap_center = p_floor + float2(float(i), float(j)) + 0.5f;
float2 offset = tap_center - p;
float w = lanczos_w(offset.x, a) * lanczos_w(offset.y, a);
color += source.sample(smp, tap_center / src_size) * w;
weight_sum += w;
}
}
return (weight_sum > 0.0f) ? (color / weight_sum) : float4(0.0f, 0.0f, 0.0f, 1.0f);
}
)";
static const char* CRTPI_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct CrtPiUniforms {
// vec4 #0
float scanline_weight;
float scanline_gap_brightness;
float bloom_factor;
float input_gamma;
// vec4 #1
float output_gamma;
float mask_brightness;
float curvature_x;
float curvature_y;
// vec4 #2
int mask_type;
int enable_scanlines;
int enable_multisample;
int enable_gamma;
// vec4 #3
int enable_curvature;
int enable_sharper;
float texture_width;
float texture_height;
};
static float2 crtpi_distort(float2 coord, float2 screen_scale, float cx, float cy) {
float2 curvature = float2(cx, cy);
float2 barrel_scale = 1.0f - (0.23f * curvature);
coord *= screen_scale;
coord -= 0.5f;
float rsq = coord.x * coord.x + coord.y * coord.y;
coord += coord * (curvature * rsq);
coord *= barrel_scale;
if (abs(coord.x) >= 0.5f || abs(coord.y) >= 0.5f) { return float2(-1.0f); }
coord += 0.5f;
coord /= screen_scale;
return coord;
}
static float crtpi_scan_weight(float dist, float sw, float gap) {
return max(1.0f - dist * dist * sw, gap);
}
static float crtpi_scan_line(float dy, float filter_w, float sw, float gap, bool ms) {
float w = crtpi_scan_weight(dy, sw, gap);
if (ms) {
w += crtpi_scan_weight(dy - filter_w, sw, gap);
w += crtpi_scan_weight(dy + filter_w, sw, gap);
w *= 0.3333333f;
}
return w;
}
fragment float4 crtpi_fs(PostVOut in [[stage_in]],
texture2d<float> tex [[texture(0)]],
sampler samp [[sampler(0)]],
constant CrtPiUniforms& u [[buffer(0)]]) {
float2 tex_size = float2(u.texture_width, u.texture_height);
float filter_width = (768.0f / u.texture_height) / 3.0f;
float2 texcoord = in.uv;
if (u.enable_curvature != 0) {
texcoord = crtpi_distort(texcoord, float2(1.0f, 1.0f), u.curvature_x, u.curvature_y);
if (texcoord.x < 0.0f) { return float4(0.0f, 0.0f, 0.0f, 1.0f); }
}
float2 coord_in_pixels = texcoord * tex_size;
float2 tc;
float scan_weight;
if (u.enable_sharper != 0) {
float2 temp = floor(coord_in_pixels) + 0.5f;
tc = temp / tex_size;
float2 deltas = coord_in_pixels - temp;
scan_weight = crtpi_scan_line(deltas.y, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float2 signs = sign(deltas);
deltas.x *= 2.0f;
deltas = deltas * deltas;
deltas.y = deltas.y * deltas.y;
deltas.x *= 0.5f;
deltas.y *= 8.0f;
deltas /= tex_size;
deltas *= signs;
tc = tc + deltas;
} else {
float temp_y = floor(coord_in_pixels.y) + 0.5f;
float y_coord = temp_y / tex_size.y;
float dy = coord_in_pixels.y - temp_y;
scan_weight = crtpi_scan_line(dy, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float sign_y = sign(dy);
dy = dy * dy;
dy = dy * dy;
dy *= 8.0f;
dy /= tex_size.y;
dy *= sign_y;
tc = float2(texcoord.x, y_coord + dy);
}
float3 colour = tex.sample(samp, tc).rgb;
if (u.enable_scanlines != 0) {
if (u.enable_gamma != 0) { colour = pow(colour, float3(u.input_gamma)); }
colour *= scan_weight * u.bloom_factor;
if (u.enable_gamma != 0) { colour = pow(colour, float3(1.0f / u.output_gamma)); }
}
if (u.mask_type == 1) {
float wm = fract(in.pos.x * 0.5f);
float3 mask = (wm < 0.5f) ? float3(u.mask_brightness, 1.0f, u.mask_brightness)
: float3(1.0f, u.mask_brightness, 1.0f);
colour *= mask;
} else if (u.mask_type == 2) {
float wm = fract(in.pos.x * 0.3333333f);
float3 mask = float3(u.mask_brightness);
if (wm < 0.3333333f) mask.x = 1.0f;
else if (wm < 0.6666666f) mask.y = 1.0f;
else mask.z = 1.0f;
colour *= mask;
}
return float4(colour, 1.0f);
}
)";
// NOLINTEND(readability-identifier-naming)
#endif // __APPLE__
namespace Rendering {
// ---------------------------------------------------------------------------
// Destructor
// ---------------------------------------------------------------------------
SDL3GPUShader::~SDL3GPUShader() {
destroy();
}
// ---------------------------------------------------------------------------
// init
// ---------------------------------------------------------------------------
auto SDL3GPUShader::init(SDL_Window* window,
SDL_Texture* texture,
const std::string& /*vertex_source*/,
const std::string& /*fragment_source*/) -> bool {
// Si ya estaba inicializado (p.ej. al cambiar borde), liberar recursos
// de textura/pipeline pero mantener el device vivo para evitar conflictos
// con SDL_Renderer en Windows/Vulkan.
if (is_initialized_) {
cleanup();
}
window_ = window;
// Dimensions from the SDL_Texture placeholder
float fw = 0.0F;
float fh = 0.0F;
SDL_GetTextureSize(texture, &fw, &fh);
game_width_ = static_cast<int>(fw);
game_height_ = static_cast<int>(fh);
uniforms_.screen_height = static_cast<float>(game_height_);
uniforms_.oversample = static_cast<float>(oversample_);
// ----------------------------------------------------------------
// 1. Create GPU device (solo si no existe ya)
// ----------------------------------------------------------------
if (preferred_driver_ == "none") {
SDL_Log("SDL3GPUShader: GPU disabled by config, using SDL_Renderer fallback");
driver_name_ = ""; // vacío → RenderInfo mostrará "sdl"
return false;
}
if (device_ == nullptr) {
#ifdef __APPLE__
const SDL_GPUShaderFormat PREFERRED = SDL_GPU_SHADERFORMAT_MSL | SDL_GPU_SHADERFORMAT_METALLIB;
#else
const SDL_GPUShaderFormat PREFERRED = SDL_GPU_SHADERFORMAT_SPIRV;
#endif
const char* preferred = preferred_driver_.empty() ? nullptr : preferred_driver_.c_str();
device_ = SDL_CreateGPUDevice(PREFERRED, false, preferred);
if (device_ == nullptr && preferred != nullptr) {
SDL_Log("SDL3GPUShader: driver '%s' not available, falling back to auto", preferred);
device_ = SDL_CreateGPUDevice(PREFERRED, false, nullptr);
}
if (device_ == nullptr) {
SDL_Log("SDL3GPUShader: SDL_CreateGPUDevice failed: %s", SDL_GetError());
return false;
}
driver_name_ = SDL_GetGPUDeviceDriver(device_);
std::cout << "GPU Driver : " << driver_name_ << '\n';
// ----------------------------------------------------------------
// 2. Claim window (una sola vez — no liberar hasta destroy())
// ----------------------------------------------------------------
if (!SDL_ClaimWindowForGPUDevice(device_, window_)) {
SDL_Log("SDL3GPUShader: SDL_ClaimWindowForGPUDevice failed: %s", SDL_GetError());
SDL_DestroyGPUDevice(device_);
device_ = nullptr;
return false;
}
SDL_SetGPUSwapchainParameters(device_, window_, SDL_GPU_SWAPCHAINCOMPOSITION_SDR, bestPresentMode(vsync_));
}
// ----------------------------------------------------------------
// 3. Create scene texture (upload target, always game resolution)
// Format: B8G8R8A8_UNORM matches SDL ARGB8888 byte layout on LE
// ----------------------------------------------------------------
SDL_GPUTextureCreateInfo tex_info = {};
tex_info.type = SDL_GPU_TEXTURETYPE_2D;
tex_info.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
tex_info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER;
tex_info.width = static_cast<Uint32>(game_width_);
tex_info.height = static_cast<Uint32>(game_height_);
tex_info.layer_count_or_depth = 1;
tex_info.num_levels = 1;
scene_texture_ = SDL_CreateGPUTexture(device_, &tex_info);
if (scene_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create scene texture: %s", SDL_GetError());
cleanup();
return false;
}
// internal_texture_: si el multiplicador és > 1, es crea ací amb les
// dimensions game·N × game·N. No bloqueja si falla — només deixa la
// textura a nullptr i el pipeline ometrà la còpia.
recreateInternalTexture();
// scaled_texture_ se creará en el primer render() una vez conocido el zoom de ventana
ss_factor_ = 0;
// ----------------------------------------------------------------
// 4. Create upload transfer buffer (CPU → GPU, always game resolution)
// ----------------------------------------------------------------
SDL_GPUTransferBufferCreateInfo tb_info = {};
tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
tb_info.size = static_cast<Uint32>(game_width_ * game_height_ * 4);
upload_buffer_ = SDL_CreateGPUTransferBuffer(device_, &tb_info);
if (upload_buffer_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create upload buffer: %s", SDL_GetError());
cleanup();
return false;
}
// ----------------------------------------------------------------
// 5. Create samplers: NEAREST (pixel art) + LINEAR (supersampling)
// ----------------------------------------------------------------
SDL_GPUSamplerCreateInfo samp_info = {};
samp_info.min_filter = SDL_GPU_FILTER_NEAREST;
samp_info.mag_filter = SDL_GPU_FILTER_NEAREST;
samp_info.mipmap_mode = SDL_GPU_SAMPLERMIPMAPMODE_NEAREST;
samp_info.address_mode_u = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
samp_info.address_mode_v = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
samp_info.address_mode_w = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
sampler_ = SDL_CreateGPUSampler(device_, &samp_info);
if (sampler_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create sampler: %s", SDL_GetError());
cleanup();
return false;
}
SDL_GPUSamplerCreateInfo lsamp_info = {};
lsamp_info.min_filter = SDL_GPU_FILTER_LINEAR;
lsamp_info.mag_filter = SDL_GPU_FILTER_LINEAR;
lsamp_info.mipmap_mode = SDL_GPU_SAMPLERMIPMAPMODE_NEAREST;
lsamp_info.address_mode_u = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
lsamp_info.address_mode_v = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
lsamp_info.address_mode_w = SDL_GPU_SAMPLERADDRESSMODE_CLAMP_TO_EDGE;
linear_sampler_ = SDL_CreateGPUSampler(device_, &lsamp_info);
if (linear_sampler_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create linear sampler: %s", SDL_GetError());
cleanup();
return false;
}
// ----------------------------------------------------------------
// 6. Create PostFX graphics pipeline
// ----------------------------------------------------------------
if (!createPipeline()) {
cleanup();
return false;
}
// ----------------------------------------------------------------
// 7. Create CrtPi graphics pipeline
// ----------------------------------------------------------------
if (!createCrtPiPipeline()) {
cleanup();
return false;
}
is_initialized_ = true;
std::cout << "GPU Shader : initialized OK — game " << game_width_ << 'x' << game_height_ << ", oversample " << oversample_ << '\n';
return true;
}
// ---------------------------------------------------------------------------
// createPipeline
// ---------------------------------------------------------------------------
auto SDL3GPUShader::createPipeline() -> bool { // NOLINT(readability-function-cognitive-complexity)
const SDL_GPUTextureFormat SWAPCHAIN_FMT = SDL_GetGPUSwapchainTextureFormat(device_, window_);
// ---- PostFX pipeline (scene/scaled → swapchain) ----
#ifdef __APPLE__
SDL_GPUShader* vert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, POSTFX_FRAG_MSL, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* vert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderSPIRV(device_, kpostfx_frag_spv, kpostfx_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((vert == nullptr) || (frag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile PostFX shaders");
if (vert != nullptr) { SDL_ReleaseGPUShader(device_, vert); }
if (frag != nullptr) { SDL_ReleaseGPUShader(device_, frag); }
return false;
}
SDL_GPUColorTargetBlendState no_blend = {};
no_blend.enable_blend = false;
no_blend.enable_color_write_mask = false;
SDL_GPUColorTargetDescription color_target = {};
color_target.format = SWAPCHAIN_FMT;
color_target.blend_state = no_blend;
SDL_GPUVertexInputState no_input = {};
SDL_GPUGraphicsPipelineCreateInfo pipe_info = {};
pipe_info.vertex_shader = vert;
pipe_info.fragment_shader = frag;
pipe_info.vertex_input_state = no_input;
pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
pipe_info.target_info.num_color_targets = 1;
pipe_info.target_info.color_target_descriptions = &color_target;
pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &pipe_info);
SDL_ReleaseGPUShader(device_, vert);
SDL_ReleaseGPUShader(device_, frag);
if (pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: PostFX pipeline creation failed: %s", SDL_GetError());
return false;
}
// ---- Upscale pipeline (scene → scaled_texture_, nearest) ----
#ifdef __APPLE__
SDL_GPUShader* uvert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* ufrag = createShaderMSL(device_, UPSCALE_FRAG_MSL, "upscale_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
#else
SDL_GPUShader* uvert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* ufrag = createShaderSPIRV(device_, kupscale_frag_spv, kupscale_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
#endif
if ((uvert == nullptr) || (ufrag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile upscale shaders");
if (uvert != nullptr) { SDL_ReleaseGPUShader(device_, uvert); }
if (ufrag != nullptr) { SDL_ReleaseGPUShader(device_, ufrag); }
return false;
}
SDL_GPUColorTargetDescription upscale_color_target = {};
upscale_color_target.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
upscale_color_target.blend_state = no_blend;
SDL_GPUGraphicsPipelineCreateInfo upscale_pipe_info = {};
upscale_pipe_info.vertex_shader = uvert;
upscale_pipe_info.fragment_shader = ufrag;
upscale_pipe_info.vertex_input_state = no_input;
upscale_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
upscale_pipe_info.target_info.num_color_targets = 1;
upscale_pipe_info.target_info.color_target_descriptions = &upscale_color_target;
upscale_pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &upscale_pipe_info);
SDL_ReleaseGPUShader(device_, uvert);
SDL_ReleaseGPUShader(device_, ufrag);
if (upscale_pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: upscale pipeline creation failed: %s", SDL_GetError());
return false;
}
// ---- PostFX offscreen pipeline (scaled_texture_ → postfx_texture_, B8G8R8A8) ----
// Mismos shaders que pipeline_ pero con formato de salida B8G8R8A8_UNORM para textura intermedia.
#ifdef __APPLE__
SDL_GPUShader* ofvert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* offrag = createShaderMSL(device_, POSTFX_FRAG_MSL, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* ofvert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* offrag = createShaderSPIRV(device_, kpostfx_frag_spv, kpostfx_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((ofvert == nullptr) || (offrag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile PostFX offscreen shaders");
if (ofvert != nullptr) { SDL_ReleaseGPUShader(device_, ofvert); }
if (offrag != nullptr) { SDL_ReleaseGPUShader(device_, offrag); }
return false;
}
SDL_GPUColorTargetDescription offscreen_color_target = {};
offscreen_color_target.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
offscreen_color_target.blend_state = no_blend;
SDL_GPUGraphicsPipelineCreateInfo offscreen_pipe_info = {};
offscreen_pipe_info.vertex_shader = ofvert;
offscreen_pipe_info.fragment_shader = offrag;
offscreen_pipe_info.vertex_input_state = no_input;
offscreen_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
offscreen_pipe_info.target_info.num_color_targets = 1;
offscreen_pipe_info.target_info.color_target_descriptions = &offscreen_color_target;
postfx_offscreen_pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &offscreen_pipe_info);
SDL_ReleaseGPUShader(device_, ofvert);
SDL_ReleaseGPUShader(device_, offrag);
if (postfx_offscreen_pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: PostFX offscreen pipeline creation failed: %s", SDL_GetError());
return false;
}
// ---- Downscale pipeline (postfx_texture_ → swapchain, Lanczos) ----
#ifdef __APPLE__
SDL_GPUShader* dvert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* dfrag = createShaderMSL(device_, DOWNSCALE_FRAG_MSL, "downscale_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* dvert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* dfrag = createShaderSPIRV(device_, kdownscale_frag_spv, kdownscale_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((dvert == nullptr) || (dfrag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile downscale shaders");
if (dvert != nullptr) { SDL_ReleaseGPUShader(device_, dvert); }
if (dfrag != nullptr) { SDL_ReleaseGPUShader(device_, dfrag); }
return false;
}
SDL_GPUColorTargetDescription downscale_color_target = {};
downscale_color_target.format = SWAPCHAIN_FMT;
downscale_color_target.blend_state = no_blend;
SDL_GPUGraphicsPipelineCreateInfo downscale_pipe_info = {};
downscale_pipe_info.vertex_shader = dvert;
downscale_pipe_info.fragment_shader = dfrag;
downscale_pipe_info.vertex_input_state = no_input;
downscale_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
downscale_pipe_info.target_info.num_color_targets = 1;
downscale_pipe_info.target_info.color_target_descriptions = &downscale_color_target;
downscale_pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &downscale_pipe_info);
SDL_ReleaseGPUShader(device_, dvert);
SDL_ReleaseGPUShader(device_, dfrag);
if (downscale_pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: downscale pipeline creation failed: %s", SDL_GetError());
return false;
}
return true;
}
// ---------------------------------------------------------------------------
// createCrtPiPipeline — pipeline dedicado para el shader CRT-Pi.
// Usa el mismo vertex shader que postfx (fullscreen-triangle genérico).
// El fragment shader es específico para el algoritmo CRT-Pi.
// Sin supersampling ni Lanczos: va siempre directo al swapchain.
// ---------------------------------------------------------------------------
auto SDL3GPUShader::createCrtPiPipeline() -> bool {
const SDL_GPUTextureFormat SWAPCHAIN_FMT = SDL_GetGPUSwapchainTextureFormat(device_, window_);
#ifdef __APPLE__
SDL_GPUShader* vert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, CRTPI_FRAG_MSL, "crtpi_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* vert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderSPIRV(device_, kcrtpi_frag_spv, kcrtpi_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((vert == nullptr) || (frag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile CrtPi shaders");
if (vert != nullptr) { SDL_ReleaseGPUShader(device_, vert); }
if (frag != nullptr) { SDL_ReleaseGPUShader(device_, frag); }
return false;
}
SDL_GPUColorTargetBlendState no_blend = {};
no_blend.enable_blend = false;
no_blend.enable_color_write_mask = false;
SDL_GPUColorTargetDescription color_target = {};
color_target.format = SWAPCHAIN_FMT;
color_target.blend_state = no_blend;
SDL_GPUVertexInputState no_input = {};
SDL_GPUGraphicsPipelineCreateInfo pipe_info = {};
pipe_info.vertex_shader = vert;
pipe_info.fragment_shader = frag;
pipe_info.vertex_input_state = no_input;
pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
pipe_info.target_info.num_color_targets = 1;
pipe_info.target_info.color_target_descriptions = &color_target;
crtpi_pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &pipe_info);
SDL_ReleaseGPUShader(device_, vert);
SDL_ReleaseGPUShader(device_, frag);
if (crtpi_pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: CrtPi pipeline creation failed: %s", SDL_GetError());
return false;
}
return true;
}
// ---------------------------------------------------------------------------
// uploadPixels — copies ARGB8888 CPU pixels into the GPU transfer buffer.
// Con supersampling (oversample_ > 1) expande cada pixel del juego a un bloque
// oversample × oversample y hornea la scanline oscura en la última fila del bloque.
// ---------------------------------------------------------------------------
void SDL3GPUShader::uploadPixels(const Uint32* pixels, int width, int height) {
if (!is_initialized_ || (upload_buffer_ == nullptr)) { return; }
void* mapped = SDL_MapGPUTransferBuffer(device_, upload_buffer_, false);
if (mapped == nullptr) {
SDL_Log("SDL3GPUShader: SDL_MapGPUTransferBuffer failed: %s", SDL_GetError());
return;
}
// Copia directa — el upscale lo hace la GPU en el primer render pass
std::memcpy(mapped, pixels, static_cast<size_t>(width * height * 4));
SDL_UnmapGPUTransferBuffer(device_, upload_buffer_);
}
// ---------------------------------------------------------------------------
// render — upload scene texture + PostFX pass → swapchain
// ---------------------------------------------------------------------------
void SDL3GPUShader::render() { // NOLINT(readability-function-cognitive-complexity)
if (!is_initialized_) { return; }
// Paso 0: si SS activo, calcular el factor necesario según el zoom actual y recrear si cambió.
// Factor = primer múltiplo de 3 >= zoom (mín 3). Se recrea solo en saltos de factor.
if (oversample_ > 1 && game_height_ > 0) {
int win_w = 0;
int win_h = 0;
SDL_GetWindowSizeInPixels(window_, &win_w, &win_h);
const float ZOOM = static_cast<float>(win_h) / static_cast<float>(game_height_);
const int NEED_FACTOR = calcSsFactor(ZOOM);
if (NEED_FACTOR != ss_factor_) {
SDL_WaitForGPUIdle(device_);
recreateScaledTexture(NEED_FACTOR);
}
}
SDL_GPUCommandBuffer* cmd = SDL_AcquireGPUCommandBuffer(device_);
if (cmd == nullptr) {
SDL_Log("SDL3GPUShader: SDL_AcquireGPUCommandBuffer failed: %s", SDL_GetError());
return;
}
// ---- Copy pass: transfer buffer → scene texture (siempre a resolución del juego) ----
SDL_GPUCopyPass* copy = SDL_BeginGPUCopyPass(cmd);
if (copy != nullptr) {
SDL_GPUTextureTransferInfo src = {};
src.transfer_buffer = upload_buffer_;
src.offset = 0;
src.pixels_per_row = static_cast<Uint32>(game_width_);
src.rows_per_layer = static_cast<Uint32>(game_height_);
SDL_GPUTextureRegion dst = {};
dst.texture = scene_texture_;
dst.w = static_cast<Uint32>(game_width_);
dst.h = static_cast<Uint32>(game_height_);
dst.d = 1;
SDL_UploadToGPUTexture(copy, &src, &dst, false);
SDL_EndGPUCopyPass(copy);
}
// ---- Internal resolution NN upscale: scene_texture_ → internal_texture_ ----
// Multiplicador enter. Si > 1, tot el pipeline downstream veu internal_texture_
// com a "scene" (mida game·N × game·N) i els passos següents (SS, PostFX,
// Lanczos, letterbox) operen sobre aquesta font més gran. L'objectiu: quan el
// filtre final LINEAR estira a finestra, parteix d'una base més gran i es veu
// menys borrós. Amb internal_res_ == 1, s'omet el pas (zero overhead).
SDL_GPUTexture* source_texture = scene_texture_;
int source_width = game_width_;
int source_height = game_height_;
if (internal_res_ > 1 && internal_texture_ != nullptr && upscale_pipeline_ != nullptr) {
SDL_GPUColorTargetInfo internal_target = {};
internal_target.texture = internal_texture_;
internal_target.load_op = SDL_GPU_LOADOP_DONT_CARE;
internal_target.store_op = SDL_GPU_STOREOP_STORE;
SDL_GPURenderPass* ipass = SDL_BeginGPURenderPass(cmd, &internal_target, 1, nullptr);
if (ipass != nullptr) {
SDL_BindGPUGraphicsPipeline(ipass, upscale_pipeline_);
SDL_GPUTextureSamplerBinding ibinding = {};
ibinding.texture = scene_texture_;
ibinding.sampler = sampler_; // sempre NEAREST per a la còpia de resolució interna
SDL_BindGPUFragmentSamplers(ipass, 0, &ibinding, 1);
SDL_DrawGPUPrimitives(ipass, 3, 1, 0, 0);
SDL_EndGPURenderPass(ipass);
}
source_texture = internal_texture_;
source_width = game_width_ * internal_res_;
source_height = game_height_ * internal_res_;
}
// ---- Upscale pass: source_texture → scaled_texture_ ----
// Si 4:3 actiu, l'estirament s'aplica ací directament (320x200 → W*factor × H*factor*1.2)
// El filtre s'aplica sempre (texture_filter_linear_), independent de 4:3.
// L'effective_scene/height reflecteix la textura real que veuen els shaders.
// Sense SS ni stretch: scene_texture_ a game_height_.
// Amb SS o stretch: scaled_texture_ a l'alçada escalada (amb o sense 4:3).
SDL_GPUTexture* effective_scene = source_texture;
// `effective_height` reflecteix l'alçada lògica del frame (per a
// scanlines i viewport), no la mida real de la textura. Es manté
// a `game_height_` encara que internal_res_ > 1 — el multiplicador
// només afecta la resolució física de la font, no l'aspect ni el
// nombre de scanlines visibles.
int effective_height = game_height_;
(void)source_width; // només es fa servir com a context informatiu
if (oversample_ > 1 && scaled_texture_ != nullptr && upscale_pipeline_ != nullptr) {
SDL_GPUColorTargetInfo upscale_target = {};
upscale_target.texture = scaled_texture_;
upscale_target.load_op = SDL_GPU_LOADOP_DONT_CARE;
upscale_target.store_op = SDL_GPU_STOREOP_STORE;
// Filtre global: s'aplica sempre (ja no depèn de 4:3).
bool use_linear = texture_filter_linear_;
SDL_GPURenderPass* upass = SDL_BeginGPURenderPass(cmd, &upscale_target, 1, nullptr);
if (upass != nullptr) {
SDL_BindGPUGraphicsPipeline(upass, upscale_pipeline_);
SDL_GPUTextureSamplerBinding ubinding = {};
ubinding.texture = source_texture;
ubinding.sampler = (use_linear && linear_sampler_ != nullptr) ? linear_sampler_ : sampler_;
SDL_BindGPUFragmentSamplers(upass, 0, &ubinding, 1);
SDL_DrawGPUPrimitives(upass, 3, 1, 0, 0);
SDL_EndGPURenderPass(upass);
}
effective_scene = scaled_texture_;
effective_height = stretch_4_3_ ? static_cast<int>(static_cast<float>(game_height_) * 1.2F) : game_height_;
} else if (stretch_4_3_) {
// Sense SS: el viewport s'encarrega de l'estirament geomètric
effective_height = static_cast<int>(static_cast<float>(game_height_) * 1.2F);
}
(void)source_height;
// ---- Acquire swapchain texture ----
SDL_GPUTexture* swapchain = nullptr;
Uint32 sw = 0;
Uint32 sh = 0;
if (!SDL_AcquireGPUSwapchainTexture(cmd, window_, &swapchain, &sw, &sh)) {
SDL_Log("SDL3GPUShader: SDL_AcquireGPUSwapchainTexture failed: %s", SDL_GetError());
SDL_SubmitGPUCommandBuffer(cmd);
return;
}
if (swapchain == nullptr) {
// Window minimized — skip frame
SDL_SubmitGPUCommandBuffer(cmd);
return;
}
// ---- Calcular viewport (dimensions lògiques del canvas) ----
// Si 4:3 actiu, effective_height ja és 240 (la textura estirada)
const float logical_w = static_cast<float>(game_width_);
const float logical_h = static_cast<float>(effective_height);
float vx = 0.0F;
float vy = 0.0F;
float vw = 0.0F;
float vh = 0.0F;
switch (scaling_mode_) {
case Options::ScalingMode::DISABLED:
// 1:1, sense escala (pot ser diminut en finestres grans)
vw = logical_w;
vh = logical_h;
break;
case Options::ScalingMode::STRETCH:
// Omple tota la finestra, escala no uniforme
vw = static_cast<float>(sw);
vh = static_cast<float>(sh);
break;
case Options::ScalingMode::LETTERBOX: {
const float SCALE = std::min(static_cast<float>(sw) / logical_w,
static_cast<float>(sh) / logical_h);
vw = logical_w * SCALE;
vh = logical_h * SCALE;
break;
}
case Options::ScalingMode::OVERSCAN: {
const float SCALE = std::max(static_cast<float>(sw) / logical_w,
static_cast<float>(sh) / logical_h);
vw = logical_w * SCALE;
vh = logical_h * SCALE;
break;
}
case Options::ScalingMode::INTEGER: {
const int SCALE = std::max(1, std::min(static_cast<int>(sw) / static_cast<int>(logical_w),
static_cast<int>(sh) / static_cast<int>(logical_h)));
vw = logical_w * static_cast<float>(SCALE);
vh = logical_h * static_cast<float>(SCALE);
break;
}
}
vx = std::floor((static_cast<float>(sw) - vw) * 0.5F);
vy = std::floor((static_cast<float>(sh) - vh) * 0.5F);
// pixel_scale: subpíxels per píxel lògic.
// Sense SS: vh/effective_height (zoom de finestra).
// Amb SS: ss_factor_ exacte (3, 6, 9...).
if (oversample_ > 1 && ss_factor_ > 0) {
uniforms_.pixel_scale = static_cast<float>(ss_factor_);
} else {
uniforms_.pixel_scale = (effective_height > 0) ? (vh / static_cast<float>(effective_height)) : 1.0F;
}
uniforms_.screen_height = static_cast<float>(effective_height);
uniforms_.time = static_cast<float>(SDL_GetTicks()) / 1000.0F;
uniforms_.oversample = (oversample_ > 1 && ss_factor_ > 0)
? static_cast<float>(ss_factor_)
: 1.0F;
// ---- Path CrtPi: directo scene_texture_ → swapchain, sin SS ni Lanczos ----
if (active_shader_ == ShaderType::CRTPI && crtpi_pipeline_ != nullptr) {
SDL_GPUColorTargetInfo color_target = {};
color_target.texture = swapchain;
color_target.load_op = SDL_GPU_LOADOP_CLEAR;
color_target.store_op = SDL_GPU_STOREOP_STORE;
color_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPURenderPass* pass = SDL_BeginGPURenderPass(cmd, &color_target, 1, nullptr);
if (pass != nullptr) {
SDL_BindGPUGraphicsPipeline(pass, crtpi_pipeline_);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(pass, &vp);
SDL_GPUTextureSamplerBinding binding = {};
binding.texture = effective_scene;
binding.sampler = sampler_; // NEAREST: el shader CrtPi fa el seu propi filtrat analític
SDL_BindGPUFragmentSamplers(pass, 0, &binding, 1);
// Injectar texture_width/height abans del push
crtpi_uniforms_.texture_width = static_cast<float>(game_width_);
crtpi_uniforms_.texture_height = static_cast<float>(effective_height);
SDL_PushGPUFragmentUniformData(cmd, 0, &crtpi_uniforms_, sizeof(CrtPiUniforms));
SDL_DrawGPUPrimitives(pass, 3, 1, 0, 0);
SDL_EndGPURenderPass(pass);
}
SDL_SubmitGPUCommandBuffer(cmd);
return;
}
// ---- Determinar si usar el path Lanczos (SS activo + algo seleccionado) ----
const bool USE_LANCZOS = (oversample_ > 1 && downscale_algo_ > 0 && scaled_texture_ != nullptr && postfx_texture_ != nullptr && postfx_offscreen_pipeline_ != nullptr && downscale_pipeline_ != nullptr);
if (USE_LANCZOS) {
// ---- Pass A: PostFX → postfx_texture_ (full scaled size, sin viewport) ----
SDL_GPUColorTargetInfo postfx_target = {};
postfx_target.texture = postfx_texture_;
postfx_target.load_op = SDL_GPU_LOADOP_CLEAR;
postfx_target.store_op = SDL_GPU_STOREOP_STORE;
postfx_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPURenderPass* ppass = SDL_BeginGPURenderPass(cmd, &postfx_target, 1, nullptr);
if (ppass != nullptr) {
SDL_BindGPUGraphicsPipeline(ppass, postfx_offscreen_pipeline_);
SDL_GPUTextureSamplerBinding pbinding = {};
pbinding.texture = scaled_texture_;
pbinding.sampler = sampler_; // NEAREST: 1:1 pass, efectos calculados analíticamente
SDL_BindGPUFragmentSamplers(ppass, 0, &pbinding, 1);
SDL_PushGPUFragmentUniformData(cmd, 0, &uniforms_, sizeof(PostFXUniforms));
SDL_DrawGPUPrimitives(ppass, 3, 1, 0, 0);
SDL_EndGPURenderPass(ppass);
}
// ---- Pass B: Downscale Lanczos → swapchain (con viewport/letterbox) ----
SDL_GPUColorTargetInfo ds_target = {};
ds_target.texture = swapchain;
ds_target.load_op = SDL_GPU_LOADOP_CLEAR;
ds_target.store_op = SDL_GPU_STOREOP_STORE;
ds_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPURenderPass* dpass = SDL_BeginGPURenderPass(cmd, &ds_target, 1, nullptr);
if (dpass != nullptr) {
SDL_BindGPUGraphicsPipeline(dpass, downscale_pipeline_);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(dpass, &vp);
SDL_GPUTextureSamplerBinding dbinding = {};
dbinding.texture = postfx_texture_;
dbinding.sampler = sampler_; // NEAREST: el shader Lanczos hace su propia interpolación
SDL_BindGPUFragmentSamplers(dpass, 0, &dbinding, 1);
// algorithm: 0=Lanczos2, 1=Lanczos3 (downscale_algo_ es 1-based)
DownscaleUniforms downscale_u = {.algorithm = downscale_algo_ - 1, .pad0 = 0.0F, .pad1 = 0.0F, .pad2 = 0.0F};
SDL_PushGPUFragmentUniformData(cmd, 0, &downscale_u, sizeof(DownscaleUniforms));
SDL_DrawGPUPrimitives(dpass, 3, 1, 0, 0);
SDL_EndGPURenderPass(dpass);
}
} else {
// ---- Render pass: PostFX → swapchain directamente (bilinear, comportamiento original) ----
SDL_GPUColorTargetInfo color_target = {};
color_target.texture = swapchain;
color_target.load_op = SDL_GPU_LOADOP_CLEAR;
color_target.store_op = SDL_GPU_STOREOP_STORE;
color_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPURenderPass* pass = SDL_BeginGPURenderPass(cmd, &color_target, 1, nullptr);
if (pass != nullptr) {
SDL_BindGPUGraphicsPipeline(pass, pipeline_);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(pass, &vp);
// Amb SS: llegir de scaled_texture_ amb LINEAR; sense SS: effective_scene amb NEAREST.
SDL_GPUTexture* input_texture = (oversample_ > 1 && scaled_texture_ != nullptr)
? scaled_texture_
: effective_scene;
SDL_GPUSampler* active_sampler = (oversample_ > 1 && linear_sampler_ != nullptr)
? linear_sampler_
: sampler_;
SDL_GPUTextureSamplerBinding binding = {};
binding.texture = input_texture;
binding.sampler = active_sampler;
SDL_BindGPUFragmentSamplers(pass, 0, &binding, 1);
SDL_PushGPUFragmentUniformData(cmd, 0, &uniforms_, sizeof(PostFXUniforms));
SDL_DrawGPUPrimitives(pass, 3, 1, 0, 0);
SDL_EndGPURenderPass(pass);
}
}
SDL_SubmitGPUCommandBuffer(cmd);
}
// ---------------------------------------------------------------------------
// cleanup — libera pipeline/texturas/buffer pero mantiene device + swapchain
// ---------------------------------------------------------------------------
void SDL3GPUShader::cleanup() {
is_initialized_ = false;
if (device_ != nullptr) {
SDL_WaitForGPUIdle(device_);
if (pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, pipeline_);
pipeline_ = nullptr;
}
if (crtpi_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, crtpi_pipeline_);
crtpi_pipeline_ = nullptr;
}
if (postfx_offscreen_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, postfx_offscreen_pipeline_);
postfx_offscreen_pipeline_ = nullptr;
}
if (upscale_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, upscale_pipeline_);
upscale_pipeline_ = nullptr;
}
if (downscale_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, downscale_pipeline_);
downscale_pipeline_ = nullptr;
}
if (scene_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
}
if (internal_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, internal_texture_);
internal_texture_ = nullptr;
}
if (scaled_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
}
if (postfx_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, postfx_texture_);
postfx_texture_ = nullptr;
}
ss_factor_ = 0;
if (upload_buffer_ != nullptr) {
SDL_ReleaseGPUTransferBuffer(device_, upload_buffer_);
upload_buffer_ = nullptr;
}
if (sampler_ != nullptr) {
SDL_ReleaseGPUSampler(device_, sampler_);
sampler_ = nullptr;
}
if (linear_sampler_ != nullptr) {
SDL_ReleaseGPUSampler(device_, linear_sampler_);
linear_sampler_ = nullptr;
}
// device_ y el claim de la ventana se mantienen vivos
}
}
// ---------------------------------------------------------------------------
// destroy — limpieza completa incluyendo device y swapchain (solo al cerrar)
// ---------------------------------------------------------------------------
void SDL3GPUShader::destroy() {
cleanup();
if (device_ != nullptr) {
if (window_ != nullptr) {
SDL_ReleaseWindowFromGPUDevice(device_, window_);
}
SDL_DestroyGPUDevice(device_);
device_ = nullptr;
}
window_ = nullptr;
}
// ---------------------------------------------------------------------------
// Shader creation helpers
// ---------------------------------------------------------------------------
auto SDL3GPUShader::createShaderMSL(SDL_GPUDevice* device,
const char* msl_source,
const char* entrypoint,
SDL_GPUShaderStage stage,
Uint32 num_samplers,
Uint32 num_uniform_buffers) -> SDL_GPUShader* {
SDL_GPUShaderCreateInfo info = {};
info.code = reinterpret_cast<const Uint8*>(msl_source);
info.code_size = std::strlen(msl_source) + 1;
info.entrypoint = entrypoint;
info.format = SDL_GPU_SHADERFORMAT_MSL;
info.stage = stage;
info.num_samplers = num_samplers;
info.num_uniform_buffers = num_uniform_buffers;
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
if (shader == nullptr) {
SDL_Log("SDL3GPUShader: MSL shader '%s' failed: %s", entrypoint, SDL_GetError());
}
return shader;
}
auto SDL3GPUShader::createShaderSPIRV(SDL_GPUDevice* device, // NOLINT(readability-convert-member-functions-to-static)
const uint8_t* spv_code,
size_t spv_size,
const char* entrypoint,
SDL_GPUShaderStage stage,
Uint32 num_samplers,
Uint32 num_uniform_buffers) -> SDL_GPUShader* {
SDL_GPUShaderCreateInfo info = {};
info.code = spv_code;
info.code_size = spv_size;
info.entrypoint = entrypoint;
info.format = SDL_GPU_SHADERFORMAT_SPIRV;
info.stage = stage;
info.num_samplers = num_samplers;
info.num_uniform_buffers = num_uniform_buffers;
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
if (shader == nullptr) {
SDL_Log("SDL3GPUShader: SPIRV shader '%s' failed: %s", entrypoint, SDL_GetError());
}
return shader;
}
void SDL3GPUShader::setPostFXParams(const PostFXParams& p) {
uniforms_.vignette_strength = p.vignette;
uniforms_.chroma_strength = p.chroma;
uniforms_.mask_strength = p.mask;
uniforms_.gamma_strength = p.gamma;
uniforms_.curvature = p.curvature;
uniforms_.bleeding = p.bleeding;
uniforms_.flicker = p.flicker;
// Las scanlines siempre las aplica el shader PostFX en GPU.
uniforms_.scanline_strength = p.scanlines;
}
void SDL3GPUShader::setCrtPiParams(const CrtPiParams& p) {
crtpi_uniforms_.scanline_weight = p.scanline_weight;
crtpi_uniforms_.scanline_gap_brightness = p.scanline_gap_brightness;
crtpi_uniforms_.bloom_factor = p.bloom_factor;
crtpi_uniforms_.input_gamma = p.input_gamma;
crtpi_uniforms_.output_gamma = p.output_gamma;
crtpi_uniforms_.mask_brightness = p.mask_brightness;
crtpi_uniforms_.curvature_x = p.curvature_x;
crtpi_uniforms_.curvature_y = p.curvature_y;
crtpi_uniforms_.mask_type = p.mask_type;
crtpi_uniforms_.enable_scanlines = p.enable_scanlines ? 1 : 0;
crtpi_uniforms_.enable_multisample = p.enable_multisample ? 1 : 0;
crtpi_uniforms_.enable_gamma = p.enable_gamma ? 1 : 0;
crtpi_uniforms_.enable_curvature = p.enable_curvature ? 1 : 0;
crtpi_uniforms_.enable_sharper = p.enable_sharper ? 1 : 0;
// texture_width/height se inyectan en render() cada frame
}
void SDL3GPUShader::setActiveShader(ShaderType type) {
active_shader_ = type;
}
auto SDL3GPUShader::bestPresentMode(bool vsync) const -> SDL_GPUPresentMode {
if (vsync) {
return SDL_GPU_PRESENTMODE_VSYNC;
}
// IMMEDIATE: sin sincronización — el driver puede no soportarlo en Wayland/compositing
if (SDL_WindowSupportsGPUPresentMode(device_, window_, SDL_GPU_PRESENTMODE_IMMEDIATE)) {
return SDL_GPU_PRESENTMODE_IMMEDIATE;
}
// MAILBOX: presenta en el siguiente VBlank pero sin bloquear el hilo (triple buffer)
if (SDL_WindowSupportsGPUPresentMode(device_, window_, SDL_GPU_PRESENTMODE_MAILBOX)) {
SDL_Log("SDL3GPUShader: IMMEDIATE no soportado, usando MAILBOX para VSync desactivado");
return SDL_GPU_PRESENTMODE_MAILBOX;
}
SDL_Log("SDL3GPUShader: IMMEDIATE y MAILBOX no soportados, forzando VSYNC");
return SDL_GPU_PRESENTMODE_VSYNC;
}
void SDL3GPUShader::setVSync(bool vsync) {
vsync_ = vsync;
if (device_ != nullptr && window_ != nullptr) {
SDL_SetGPUSwapchainParameters(device_, window_, SDL_GPU_SWAPCHAINCOMPOSITION_SDR, bestPresentMode(vsync_));
}
}
void SDL3GPUShader::setScalingMode(Options::ScalingMode mode) {
scaling_mode_ = mode;
}
// setInternalResolution — canvia el multiplicador de resolució interna.
// Recrea la textura intermèdia amb les noves dimensions (320·N × 200·N).
void SDL3GPUShader::setInternalResolution(int multiplier) {
const int NEW = std::max(1, multiplier);
if (NEW == internal_res_) return;
internal_res_ = NEW;
if (is_initialized_ && device_ != nullptr) {
SDL_WaitForGPUIdle(device_);
recreateInternalTexture();
}
}
void SDL3GPUShader::setStretch4_3(bool enabled) {
stretch_4_3_ = enabled;
if (!is_initialized_ || device_ == nullptr) return;
// Recrear scaled_texture_ perquè tinga les dimensions correctes (amb o sense 4:3)
if (oversample_ > 1 && ss_factor_ > 0) {
SDL_WaitForGPUIdle(device_);
recreateScaledTexture(ss_factor_);
}
}
// ---------------------------------------------------------------------------
// setOversample — cambia el factor SS; recrea texturas si ya está inicializado
// ---------------------------------------------------------------------------
void SDL3GPUShader::setOversample(int factor) {
const int NEW_FACTOR = std::max(1, factor);
if (NEW_FACTOR == oversample_) { return; }
oversample_ = NEW_FACTOR;
if (is_initialized_) {
reinitTexturesAndBuffer();
// scanline_strength se actualizará en el próximo setPostFXParams
}
}
void SDL3GPUShader::setDownscaleAlgo(int algo) {
downscale_algo_ = std::max(0, std::min(algo, 2));
}
auto SDL3GPUShader::getSsTextureSize() const -> std::pair<int, int> {
if (ss_factor_ <= 1) { return {0, 0}; }
return {game_width_ * ss_factor_, game_height_ * ss_factor_};
}
// ---------------------------------------------------------------------------
// reinitTexturesAndBuffer — recrea scene_texture_, scaled_texture_ y
// upload_buffer_ con el factor oversample_ actual. No toca pipelines ni samplers.
// ---------------------------------------------------------------------------
auto SDL3GPUShader::reinitTexturesAndBuffer() -> bool {
if (device_ == nullptr) { return false; }
SDL_WaitForGPUIdle(device_);
if (scene_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
}
if (internal_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, internal_texture_);
internal_texture_ = nullptr;
}
// scaled_texture_ se libera aquí; se recreará en el primer render() con el factor correcto
if (scaled_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
}
ss_factor_ = 0;
if (upload_buffer_ != nullptr) {
SDL_ReleaseGPUTransferBuffer(device_, upload_buffer_);
upload_buffer_ = nullptr;
}
uniforms_.screen_height = static_cast<float>(game_height_);
uniforms_.oversample = static_cast<float>(oversample_);
// scene_texture_: siempre a resolución del juego
SDL_GPUTextureCreateInfo tex_info = {};
tex_info.type = SDL_GPU_TEXTURETYPE_2D;
tex_info.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
tex_info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER;
tex_info.width = static_cast<Uint32>(game_width_);
tex_info.height = static_cast<Uint32>(game_height_);
tex_info.layer_count_or_depth = 1;
tex_info.num_levels = 1;
scene_texture_ = SDL_CreateGPUTexture(device_, &tex_info);
if (scene_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: reinit — failed to create scene texture: %s", SDL_GetError());
return false;
}
// upload_buffer_: siempre a resolución del juego
SDL_GPUTransferBufferCreateInfo tb_info = {};
tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
tb_info.size = static_cast<Uint32>(game_width_ * game_height_ * 4);
upload_buffer_ = SDL_CreateGPUTransferBuffer(device_, &tb_info);
if (upload_buffer_ == nullptr) {
SDL_Log("SDL3GPUShader: reinit — failed to create upload buffer: %s", SDL_GetError());
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
return false;
}
// Recrea la textura interna si internal_res_ > 1 — manté coherència
// en canvis d'SS que passen per reinitTexturesAndBuffer().
recreateInternalTexture();
SDL_Log("SDL3GPUShader: reinit — scene %dx%d, SS %s, internal ×%d (scaled se creará en render)",
game_width_,
game_height_,
oversample_ > 1 ? "on" : "off",
internal_res_);
return true;
}
// ---------------------------------------------------------------------------
// calcSsFactor — primer múltiplo de 3 >= zoom, mínimo 3.
// Ejemplos: zoom 1,2,3 → 3; zoom 4,5,6 → 6; zoom 4.4 → 6; zoom 7,8,9 → 9.
// ---------------------------------------------------------------------------
auto SDL3GPUShader::calcSsFactor(float zoom) -> int {
const int MULTIPLE = 3;
const int N = static_cast<int>(std::ceil(zoom / static_cast<float>(MULTIPLE)));
return std::max(1, N) * MULTIPLE;
}
// ---------------------------------------------------------------------------
// recreateScaledTexture — libera y recrea scaled_texture_ para el factor dado.
// Llamar solo cuando device_ no esté ejecutando comandos (SDL_WaitForGPUIdle previo).
// ---------------------------------------------------------------------------
auto SDL3GPUShader::recreateScaledTexture(int factor) -> bool {
if (scaled_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
}
if (postfx_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, postfx_texture_);
postfx_texture_ = nullptr;
}
ss_factor_ = 0;
const int W = game_width_ * factor;
// Si 4:3 actiu, l'alçada inclou l'estirament (200 * factor * 1.2)
const int H = stretch_4_3_
? static_cast<int>(static_cast<float>(game_height_) * 1.2F * static_cast<float>(factor))
: game_height_ * factor;
SDL_GPUTextureCreateInfo info = {};
info.type = SDL_GPU_TEXTURETYPE_2D;
info.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER | SDL_GPU_TEXTUREUSAGE_COLOR_TARGET;
info.width = static_cast<Uint32>(W);
info.height = static_cast<Uint32>(H);
info.layer_count_or_depth = 1;
info.num_levels = 1;
scaled_texture_ = SDL_CreateGPUTexture(device_, &info);
if (scaled_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create scaled texture %dx%d (factor %d): %s",
W,
H,
factor,
SDL_GetError());
return false;
}
postfx_texture_ = SDL_CreateGPUTexture(device_, &info);
if (postfx_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create postfx texture %dx%d (factor %d): %s",
W,
H,
factor,
SDL_GetError());
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
return false;
}
ss_factor_ = factor;
SDL_Log("SDL3GPUShader: scaled+postfx textures %dx%d (factor %d×)", W, H, factor);
return true;
}
// ---------------------------------------------------------------------------
// recreateInternalTexture — libera y recrea internal_texture_ para el
// multiplicador internal_res_ actual. Si val 1, allibera i queda a nullptr
// (el pipeline ometrà la còpia al següent render).
// ---------------------------------------------------------------------------
auto SDL3GPUShader::recreateInternalTexture() -> bool {
if (internal_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, internal_texture_);
internal_texture_ = nullptr;
}
if (internal_res_ <= 1 || device_ == nullptr) return true;
const int W = game_width_ * internal_res_;
const int H = game_height_ * internal_res_;
SDL_GPUTextureCreateInfo info = {};
info.type = SDL_GPU_TEXTURETYPE_2D;
info.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER | SDL_GPU_TEXTUREUSAGE_COLOR_TARGET;
info.width = static_cast<Uint32>(W);
info.height = static_cast<Uint32>(H);
info.layer_count_or_depth = 1;
info.num_levels = 1;
internal_texture_ = SDL_CreateGPUTexture(device_, &info);
if (internal_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create internal texture %dx%d (×%d): %s",
W, H, internal_res_, SDL_GetError());
return false;
}
SDL_Log("SDL3GPUShader: internal texture %dx%d (×%d)", W, H, internal_res_);
return true;
}
} // namespace Rendering