shader postfx nou + spv regenerat + msl extret a headers

This commit is contained in:
2026-05-17 15:06:24 +02:00
parent 70aa58ec46
commit dcb004b5a7
7 changed files with 10048 additions and 8387 deletions
+47 -25
View File
@@ -6,7 +6,9 @@
// xxd -i postfx.frag.spv > ../../source/core/rendering/sdl3gpu/postfx_frag_spv.h
//
// PostFXUniforms must match exactly the C++ struct in sdl3gpu_shader.hpp
// (8 floats, 32 bytes, std140/scalar layout).
// (16 floats = 4 × vec4 = 64 bytes, std140/scalar layout).
// IMPORTANT: Qualsevol canvi ací cal replicar-lo a mà a
// source/core/rendering/sdl3gpu/msl/postfx_frag.msl.h (no hi ha generador).
layout(location = 0) in vec2 v_uv;
layout(location = 0) out vec4 out_color;
@@ -15,7 +17,7 @@ layout(set = 2, binding = 0) uniform sampler2D scene;
layout(set = 3, binding = 0) uniform PostFXUniforms {
float vignette_strength;
float chroma_strength;
float chroma_min; // intensitat mínima de l'aberració cromàtica
float scanline_strength;
float screen_height;
float mask_strength;
@@ -24,10 +26,28 @@ layout(set = 3, binding = 0) uniform PostFXUniforms {
float bleeding;
float pixel_scale; // physical pixels per logical pixel (vh / tex_height_)
float time; // seconds since SDL init
float oversample; // supersampling factor (1.0 = off, 3.0 = 3×SS)
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz — 48 bytes total (3 × 16)
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz
float chroma_max; // intensitat màxima; si == chroma_min → chroma estàtic
// vec4 #3 — paràmetres de scanlines (exposats per preset YAML)
float scan_dark_ratio; // fracció de subfila fosca per fila lògica (1/3 ≈ 0.333)
float scan_dark_floor; // multiplicador de brillantor de la subfila fosca
float scan_edge_soft; // 0 = step dur; 1 = suavitzat d'1 píxel físic (estil crtpi)
float pad3; // padding per tancar a 64 bytes (4 × vec4)
} u;
// Mostreig bilinear horitzontal d'un canal RGB. Evita el "tic-tac" del sampler
// NEAREST quan l'offset de chroma és subpíxel: sense interpolar, l'offset
// arrodonia entre 1 i 2 píxels i el drift temporal feia un parpelleig discret.
float sampleBilinearX(vec2 uv_target, int channel) {
vec2 tex_size = vec2(textureSize(scene, 0));
float px = uv_target.x * tex_size.x - 0.5;
float p_floor = floor(px);
float f = px - p_floor;
vec4 c0 = texture(scene, vec2((p_floor + 0.5) / tex_size.x, uv_target.y));
vec4 c1 = texture(scene, vec2((p_floor + 1.5) / tex_size.x, uv_target.y));
return mix(c0[channel], c1[channel], f);
}
// YCbCr helpers for NTSC bleeding
vec3 rgb_to_ycc(vec3 rgb) {
return vec3(
@@ -69,11 +89,11 @@ void main() {
vec3 base = texture(scene, uv).rgb;
// Sangrado NTSC — difuminado horizontal de crominancia.
// step = 1 pixel lógico de juego en UV (corrige SS: textureSize.x = game_w * oversample).
// step = 1 pixel lógico de juego en UV.
vec3 colour;
if (u.bleeding > 0.0) {
float tw = float(textureSize(scene, 0).x);
float step = u.oversample / tw; // 1 pixel lógico en UV
float step = 1.0 / tw; // 1 pixel lógico en UV
vec3 ycc = rgb_to_ycc(base);
vec3 ycc_l2 = rgb_to_ycc(texture(scene, uv - vec2(2.0*step, 0.0)).rgb);
vec3 ycc_l1 = rgb_to_ycc(texture(scene, uv - vec2(1.0*step, 0.0)).rgb);
@@ -85,10 +105,14 @@ void main() {
colour = base;
}
// Aberración cromática (drift animado con time para efecto NTSC real)
float ca = u.chroma_strength * 0.005 * (1.0 + 0.15 * sin(u.time * 7.3));
colour.r = texture(scene, uv + vec2(ca, 0.0)).r;
colour.b = texture(scene, uv - vec2(ca, 0.0)).b;
// Aberración cromática — intensitat varia entre chroma_min i chroma_max amb
// una sinusoidal (si min == max, queda estàtica). Mostreig bilinear horitzontal
// per evitar el "tic-tac" del NEAREST sampler quan l'offset és subpíxel.
if (u.chroma_min > 0.0 || u.chroma_max > 0.0) {
float ca = mix(u.chroma_min, u.chroma_max, 0.5 + 0.5 * sin(u.time * 7.3)) * 0.005;
colour.r = sampleBilinearX(uv + vec2(ca, 0.0), 0);
colour.b = sampleBilinearX(uv - vec2(ca, 0.0), 2);
}
// Corrección gamma (linealizar antes de scanlines, codificar después)
if (u.gamma_strength > 0.0) {
@@ -96,22 +120,20 @@ void main() {
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — proporción 2/3 brillantes + 1/3 oscuras por fila lógica.
// Casos especiales: 1 subfila → sin efecto; 2 subfilas → 1+1 (50/50).
// Constantes ajustables:
const float SCAN_DARK_RATIO = 0.333; // fracción de subfilas oscuras (ps >= 3)
const float SCAN_DARK_FLOOR = 0.42; // multiplicador de brillo de subfilas oscuras
// Scanlines — tècnica dels 3 subpíxels verticals per píxel lògic (aee/projecte_2026):
// franja fosca ocupant `scan_dark_ratio` al final de cada fila lògica. La transició es
// suavitza amb smoothstep d'ample ≈ 1 píxel físic (estil crtpi: filtratge analític
// continu), controlat per `scan_edge_soft`. A 0 és equivalent al step dur antic.
if (u.scanline_strength > 0.0) {
float ps = max(1.0, round(u.pixel_scale));
float frac_in_row = fract(uv.y * u.screen_height);
float row_pos = floor(frac_in_row * ps);
// bright_rows: cuántas subfilas son brillantes
// ps==1 → ps (todo brillante → is_dark nunca se activa)
// ps==2 → 1 brillante + 1 oscura
// ps>=3 → floor(ps * (1 - DARK_RATIO)) brillantes
float bright_rows = (ps < 2.0) ? ps : ((ps < 3.0) ? 1.0 : floor(ps * (1.0 - SCAN_DARK_RATIO)));
float is_dark = step(bright_rows, row_pos);
float scan = mix(1.0, SCAN_DARK_FLOOR, is_dark);
float ps = max(u.pixel_scale, 1.0);
float sub = fract(uv.y * u.screen_height); // [0,1) dins la fila lògica
float dark_center = 1.0 - u.scan_dark_ratio * 0.5; // centre de la franja fosca
float d = abs(sub - dark_center);
d = min(d, 1.0 - d); // wrap a la fila següent
float half_width = u.scan_dark_ratio * 0.5;
float softness = u.scan_edge_soft * 0.5 / ps; // mig píxel físic a cada costat
float band = 1.0 - smoothstep(half_width - softness, half_width + softness, d);
float scan = mix(1.0, u.scan_dark_floor, band);
colour *= mix(1.0, scan, u.scanline_strength);
}
@@ -0,0 +1,144 @@
#pragma once
#ifdef __APPLE__
// Fragment shader del shader "crtpi" (algoritme CRT-Pi): scanlines amb
// pesos gaussians, multisample opcional, gamma i màscara de subpíxels.
namespace Rendering::Msl {
inline constexpr const char* kCrtpiFrag = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct CrtPiUniforms {
float scanline_weight;
float scanline_gap_brightness;
float bloom_factor;
float input_gamma;
float output_gamma;
float mask_brightness;
float curvature_x;
float curvature_y;
int mask_type;
int enable_scanlines;
int enable_multisample;
int enable_gamma;
int enable_curvature;
int enable_sharper;
float texture_width;
float texture_height;
};
static float2 crtpi_distort(float2 coord, float2 screen_scale, float cx, float cy) {
float2 curvature = float2(cx, cy);
float2 barrel_scale = 1.0f - (0.23f * curvature);
coord *= screen_scale;
coord -= 0.5f;
float rsq = coord.x * coord.x + coord.y * coord.y;
coord += coord * (curvature * rsq);
coord *= barrel_scale;
if (abs(coord.x) >= 0.5f || abs(coord.y) >= 0.5f) { return float2(-1.0f); }
coord += 0.5f;
coord /= screen_scale;
return coord;
}
static float crtpi_scan_weight(float dist, float sw, float gap) {
return max(1.0f - dist * dist * sw, gap);
}
static float crtpi_scan_line(float dy, float filter_w, float sw, float gap, bool ms) {
float w = crtpi_scan_weight(dy, sw, gap);
if (ms) {
w += crtpi_scan_weight(dy - filter_w, sw, gap);
w += crtpi_scan_weight(dy + filter_w, sw, gap);
w *= 0.3333333f;
}
return w;
}
fragment float4 crtpi_fs(PostVOut in [[stage_in]],
texture2d<float> tex [[texture(0)]],
sampler samp [[sampler(0)]],
constant CrtPiUniforms& u [[buffer(0)]]) {
float2 tex_size = float2(u.texture_width, u.texture_height);
// Amplada del filtre de scanline analític. 768 = alçada de referència
// CRT a la qual es va tarar l'algoritme original; 3 = divisió per
// subpíxel (R/G/B) del multisample. El resultat escala amb la textura
// d'entrada, de manera que més alçada → filtre més fi.
const float CRT_REFERENCE_HEIGHT = 768.0f;
const float SUBPIXEL_DIV = 3.0f;
float filter_width = (CRT_REFERENCE_HEIGHT / u.texture_height) / SUBPIXEL_DIV;
float2 texcoord = in.uv;
if (u.enable_curvature != 0) {
texcoord = crtpi_distort(texcoord, float2(1.0f, 1.0f), u.curvature_x, u.curvature_y);
if (texcoord.x < 0.0f) { return float4(0.0f, 0.0f, 0.0f, 1.0f); }
}
float2 coord_in_pixels = texcoord * tex_size;
float2 tc;
float scan_weight;
if (u.enable_sharper != 0) {
float2 temp = floor(coord_in_pixels) + 0.5f;
tc = temp / tex_size;
float2 deltas = coord_in_pixels - temp;
scan_weight = crtpi_scan_line(deltas.y, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float2 signs = sign(deltas);
deltas.x *= 2.0f;
deltas = deltas * deltas;
deltas.y = deltas.y * deltas.y;
deltas.x *= 0.5f;
deltas.y *= 8.0f;
deltas /= tex_size;
deltas *= signs;
tc = tc + deltas;
} else {
float temp_y = floor(coord_in_pixels.y) + 0.5f;
float y_coord = temp_y / tex_size.y;
float dy = coord_in_pixels.y - temp_y;
scan_weight = crtpi_scan_line(dy, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float sign_y = sign(dy);
dy = dy * dy;
dy = dy * dy;
dy *= 8.0f;
dy /= tex_size.y;
dy *= sign_y;
tc = float2(texcoord.x, y_coord + dy);
}
float3 colour = tex.sample(samp, tc).rgb;
if (u.enable_scanlines != 0) {
if (u.enable_gamma != 0) { colour = pow(colour, float3(u.input_gamma)); }
colour *= scan_weight * u.bloom_factor;
if (u.enable_gamma != 0) { colour = pow(colour, float3(1.0f / u.output_gamma)); }
}
if (u.mask_type == 1) {
float wm = fract(in.pos.x * 0.5f);
float3 mask = (wm < 0.5f) ? float3(u.mask_brightness, 1.0f, u.mask_brightness)
: float3(1.0f, u.mask_brightness, 1.0f);
colour *= mask;
} else if (u.mask_type == 2) {
float wm = fract(in.pos.x * 0.3333333f);
float3 mask = float3(u.mask_brightness);
if (wm < 0.3333333f) mask.x = 1.0f;
else if (wm < 0.6666666f) mask.y = 1.0f;
else mask.z = 1.0f;
colour *= mask;
}
return float4(colour, 1.0f);
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
@@ -0,0 +1,168 @@
#pragma once
#ifdef __APPLE__
// Fragment shader del shader "postfx": vignette, chroma, scanlines, mask,
// gamma, curvature, bleeding i flicker. Els paràmetres venen via uniforms.
//
// IMPORTANT: mantenir sincronitzat a mà amb data/shaders/postfx.frag. SDL3 GPU
// compila aquest string MSL en runtime; no hi ha generador automàtic. Qualsevol
// canvi a la struct d'uniforms o a la lògica del GLSL cal replicar-lo ací al
// mateix commit. Mida total = 64 bytes (4 × vec4).
namespace Rendering::Msl {
inline constexpr const char* kPostfxFrag = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct PostFXUniforms {
float vignette_strength;
float chroma_min;
float scanline_strength;
float screen_height;
float mask_strength;
float gamma_strength;
float curvature;
float bleeding;
float pixel_scale;
float time;
float flicker;
float chroma_max;
// vec4 #3 — paràmetres de scanlines (exposats per preset YAML)
float scan_dark_ratio;
float scan_dark_floor;
float scan_edge_soft;
float pad3;
};
// Mostreig bilinear horitzontal d'un canal RGB. Evita el "tic-tac" del sampler
// NEAREST quan l'offset de chroma és subpíxel.
static float sampleBilinearX(float2 uv_target, int channel, texture2d<float> scene, sampler samp) {
float2 tex_size = float2(scene.get_width(), scene.get_height());
float px = uv_target.x * tex_size.x - 0.5f;
float p_floor = floor(px);
float f = px - p_floor;
float4 c0 = scene.sample(samp, float2((p_floor + 0.5f) / tex_size.x, uv_target.y));
float4 c1 = scene.sample(samp, float2((p_floor + 1.5f) / tex_size.x, uv_target.y));
return mix(c0[channel], c1[channel], f);
}
static float3 rgb_to_ycc(float3 rgb) {
return float3(
0.299f*rgb.r + 0.587f*rgb.g + 0.114f*rgb.b,
-0.169f*rgb.r - 0.331f*rgb.g + 0.500f*rgb.b + 0.5f,
0.500f*rgb.r - 0.419f*rgb.g - 0.081f*rgb.b + 0.5f
);
}
static float3 ycc_to_rgb(float3 ycc) {
float y = ycc.x;
float cb = ycc.y - 0.5f;
float cr = ycc.z - 0.5f;
return clamp(float3(
y + 1.402f*cr,
y - 0.344f*cb - 0.714f*cr,
y + 1.772f*cb
), 0.0f, 1.0f);
}
fragment float4 postfx_fs(PostVOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler samp [[sampler(0)]],
constant PostFXUniforms& u [[buffer(0)]]) {
float2 uv = in.uv;
if (u.curvature > 0.0f) {
float2 c = uv - 0.5f;
float rsq = dot(c, c);
float2 dist = float2(0.05f, 0.1f) * u.curvature;
float2 barrelScale = 1.0f - 0.23f * dist;
c += c * (dist * rsq);
c *= barrelScale;
if (abs(c.x) >= 0.5f || abs(c.y) >= 0.5f) {
return float4(0.0f, 0.0f, 0.0f, 1.0f);
}
uv = c + 0.5f;
}
float3 base = scene.sample(samp, uv).rgb;
float3 colour;
if (u.bleeding > 0.0f) {
float tw = float(scene.get_width());
float step = 1.0f / tw;
float3 ycc = rgb_to_ycc(base);
float3 ycc_l2 = rgb_to_ycc(scene.sample(samp, uv - float2(2.0f*step, 0.0f)).rgb);
float3 ycc_l1 = rgb_to_ycc(scene.sample(samp, uv - float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r1 = rgb_to_ycc(scene.sample(samp, uv + float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r2 = rgb_to_ycc(scene.sample(samp, uv + float2(2.0f*step, 0.0f)).rgb);
ycc.yz = (ycc_l2.yz + ycc_l1.yz*2.0f + ycc.yz*2.0f + ycc_r1.yz*2.0f + ycc_r2.yz) / 8.0f;
colour = mix(base, ycc_to_rgb(ycc), u.bleeding);
} else {
colour = base;
}
// Chroma — varia entre chroma_min i chroma_max via sinusoidal; si min == max
// queda estàtic. Mostreig bilinear horitzontal per evitar el "tic-tac" del
// NEAREST sampler amb offsets subpíxel.
if (u.chroma_min > 0.0f || u.chroma_max > 0.0f) {
float ca = mix(u.chroma_min, u.chroma_max, 0.5f + 0.5f * sin(u.time * 7.3f)) * 0.005f;
colour.r = sampleBilinearX(uv + float2(ca, 0.0f), 0, scene, samp);
colour.b = sampleBilinearX(uv - float2(ca, 0.0f), 2, scene, samp);
}
if (u.gamma_strength > 0.0f) {
float3 lin = pow(colour, float3(2.4f));
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — 3 subpíxels per fila lògica (2 brillants + 1 fosca). Transició
// suavitzada amb smoothstep d'ample ≈ 1 píxel físic (estil crtpi: filtratge
// analític continu). scan_edge_soft = 0 recupera el step dur de l'original.
if (u.scanline_strength > 0.0f) {
float ps = max(u.pixel_scale, 1.0f);
float sub = fract(uv.y * u.screen_height);
float dark_center = 1.0f - u.scan_dark_ratio * 0.5f;
float d = abs(sub - dark_center);
d = min(d, 1.0f - d);
float half_width = u.scan_dark_ratio * 0.5f;
float softness = u.scan_edge_soft * 0.5f / ps;
float band = 1.0f - smoothstep(half_width - softness, half_width + softness, d);
float scan = mix(1.0f, u.scan_dark_floor, band);
colour *= mix(1.0f, scan, u.scanline_strength);
}
if (u.gamma_strength > 0.0f) {
float3 enc = pow(colour, float3(1.0f/2.2f));
colour = mix(colour, enc, u.gamma_strength);
}
float2 d = uv - 0.5f;
float vignette = 1.0f - dot(d, d) * u.vignette_strength;
colour *= clamp(vignette, 0.0f, 1.0f);
if (u.mask_strength > 0.0f) {
float whichMask = fract(in.pos.x * 0.3333333f);
float3 mask = float3(0.80f);
if (whichMask < 0.3333333f) mask.x = 1.0f;
else if (whichMask < 0.6666667f) mask.y = 1.0f;
else mask.z = 1.0f;
colour = mix(colour, colour * mask, u.mask_strength);
}
if (u.flicker > 0.0f) {
float flicker_wave = sin(u.time * 100.0f) * 0.5f + 0.5f;
colour *= 1.0f - u.flicker * 0.04f * flicker_wave;
}
return float4(colour, 1.0f);
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
@@ -0,0 +1,30 @@
#pragma once
#ifdef __APPLE__
// Vertex shader compartit per tots els pipelines de post-procés:
// fullscreen-triangle que cobreix tota l'àrea del swapchain amb UVs a [0,1].
namespace Rendering::Msl {
inline constexpr const char* kPostfxVert = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
vertex PostVOut postfx_vs(uint vid [[vertex_id]]) {
const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} };
const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} };
PostVOut out;
out.pos = float4(positions[vid], 0.0, 1.0);
out.uv = uvs[vid];
return out;
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
@@ -0,0 +1,23 @@
#pragma once
#ifdef __APPLE__
// Fragment shader d'upscale (mostreig directe). S'usa per al pas de resolució
// interna (scene_texture → internal_texture) quan internal_resolution > 1.
namespace Rendering::Msl {
inline constexpr const char* kUpscaleFrag = R"(
#include <metal_stdlib>
using namespace metal;
struct VertOut { float4 pos [[position]]; float2 uv; };
fragment float4 upscale_fs(VertOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler smp [[sampler(0)]])
{
return scene.sample(smp, in.uv);
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
+51 -713
View File
@@ -9,360 +9,17 @@
#ifndef __APPLE__
#include "core/rendering/sdl3gpu/spv/crtpi_frag_spv.h"
#include "core/rendering/sdl3gpu/spv/downscale_frag_spv.h"
#include "core/rendering/sdl3gpu/spv/postfx_frag_spv.h"
#include "core/rendering/sdl3gpu/spv/postfx_vert_spv.h"
#include "core/rendering/sdl3gpu/spv/upscale_frag_spv.h"
#endif
#ifdef __APPLE__
// ============================================================================
// MSL shaders (Metal Shading Language) — macOS
// ============================================================================
// NOLINTBEGIN(readability-identifier-naming)
static const char* POSTFX_VERT_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
vertex PostVOut postfx_vs(uint vid [[vertex_id]]) {
const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} };
const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} };
PostVOut out;
out.pos = float4(positions[vid], 0.0, 1.0);
out.uv = uvs[vid];
return out;
}
)";
static const char* POSTFX_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct PostFXUniforms {
float vignette_strength;
float chroma_strength;
float scanline_strength;
float screen_height;
float mask_strength;
float gamma_strength;
float curvature;
float bleeding;
float pixel_scale;
float time;
float oversample; // 1.0 = sin SS, 3.0 = 3× supersampling
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz
};
// YCbCr helpers for NTSC bleeding
static float3 rgb_to_ycc(float3 rgb) {
return float3(
0.299f*rgb.r + 0.587f*rgb.g + 0.114f*rgb.b,
-0.169f*rgb.r - 0.331f*rgb.g + 0.500f*rgb.b + 0.5f,
0.500f*rgb.r - 0.419f*rgb.g - 0.081f*rgb.b + 0.5f
);
}
static float3 ycc_to_rgb(float3 ycc) {
float y = ycc.x;
float cb = ycc.y - 0.5f;
float cr = ycc.z - 0.5f;
return clamp(float3(
y + 1.402f*cr,
y - 0.344f*cb - 0.714f*cr,
y + 1.772f*cb
), 0.0f, 1.0f);
}
fragment float4 postfx_fs(PostVOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler samp [[sampler(0)]],
constant PostFXUniforms& u [[buffer(0)]]) {
float2 uv = in.uv;
// Curvatura barrel CRT
if (u.curvature > 0.0f) {
float2 c = uv - 0.5f;
float rsq = dot(c, c);
float2 dist = float2(0.05f, 0.1f) * u.curvature;
float2 barrelScale = 1.0f - 0.23f * dist;
c += c * (dist * rsq);
c *= barrelScale;
if (abs(c.x) >= 0.5f || abs(c.y) >= 0.5f) {
return float4(0.0f, 0.0f, 0.0f, 1.0f);
}
uv = c + 0.5f;
}
// Muestra base
float3 base = scene.sample(samp, uv).rgb;
// Sangrado NTSC — difuminado horizontal de crominancia.
// step = 1 pixel de juego en espacio UV (corrige SS: scene.get_width() = game_w * oversample).
float3 colour;
if (u.bleeding > 0.0f) {
float tw = float(scene.get_width());
float step = u.oversample / tw; // 1 pixel lógico en UV
float3 ycc = rgb_to_ycc(base);
float3 ycc_l2 = rgb_to_ycc(scene.sample(samp, uv - float2(2.0f*step, 0.0f)).rgb);
float3 ycc_l1 = rgb_to_ycc(scene.sample(samp, uv - float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r1 = rgb_to_ycc(scene.sample(samp, uv + float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r2 = rgb_to_ycc(scene.sample(samp, uv + float2(2.0f*step, 0.0f)).rgb);
ycc.yz = (ycc_l2.yz + ycc_l1.yz*2.0f + ycc.yz*2.0f + ycc_r1.yz*2.0f + ycc_r2.yz) / 8.0f;
colour = mix(base, ycc_to_rgb(ycc), u.bleeding);
} else {
colour = base;
}
// Aberración cromática (drift animado con time para efecto NTSC real)
float ca = u.chroma_strength * 0.005f * (1.0f + 0.15f * sin(u.time * 7.3f));
colour.r = scene.sample(samp, uv + float2(ca, 0.0f)).r;
colour.b = scene.sample(samp, uv - float2(ca, 0.0f)).b;
// Corrección gamma (linealizar antes de scanlines, codificar después)
if (u.gamma_strength > 0.0f) {
float3 lin = pow(colour, float3(2.4f));
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — proporción 2/3 brillantes + 1/3 oscuras por fila lógica.
// Casos especiales: 1 subfila → sin efecto; 2 subfilas → 1+1 (50/50).
// Constantes ajustables:
const float SCAN_DARK_RATIO = 0.333f; // fracción de subfilas oscuras (ps >= 3)
const float SCAN_DARK_FLOOR = 0.42f; // multiplicador de brillo de subfilas oscuras
if (u.scanline_strength > 0.0f) {
float ps = max(1.0f, round(u.pixel_scale));
float frac_in_row = fract(uv.y * u.screen_height);
float row_pos = floor(frac_in_row * ps);
float bright_rows = (ps < 2.0f) ? ps : ((ps < 3.0f) ? 1.0f : floor(ps * (1.0f - SCAN_DARK_RATIO)));
float is_dark = step(bright_rows, row_pos);
float scan = mix(1.0f, SCAN_DARK_FLOOR, is_dark);
colour *= mix(1.0f, scan, u.scanline_strength);
}
if (u.gamma_strength > 0.0f) {
float3 enc = pow(colour, float3(1.0f/2.2f));
colour = mix(colour, enc, u.gamma_strength);
}
// Viñeta
float2 d = uv - 0.5f;
float vignette = 1.0f - dot(d, d) * u.vignette_strength;
colour *= clamp(vignette, 0.0f, 1.0f);
// Máscara de fósforo RGB — después de scanlines (orden original):
// filas brillantes saturadas → máscara invisible, filas oscuras → RGB visible.
if (u.mask_strength > 0.0f) {
float whichMask = fract(in.pos.x * 0.3333333f);
float3 mask = float3(0.80f);
if (whichMask < 0.3333333f) mask.x = 1.0f;
else if (whichMask < 0.6666667f) mask.y = 1.0f;
else mask.z = 1.0f;
colour = mix(colour, colour * mask, u.mask_strength);
}
// Parpadeo de fósforo CRT (~50 Hz)
if (u.flicker > 0.0f) {
float flicker_wave = sin(u.time * 100.0f) * 0.5f + 0.5f;
colour *= 1.0f - u.flicker * 0.04f * flicker_wave;
}
return float4(colour, 1.0f);
}
)";
static const char* UPSCALE_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct VertOut { float4 pos [[position]]; float2 uv; };
fragment float4 upscale_fs(VertOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler smp [[sampler(0)]])
{
return scene.sample(smp, in.uv);
}
)";
static const char* DOWNSCALE_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct VertOut { float4 pos [[position]]; float2 uv; };
struct DownscaleUniforms { int algorithm; float pad0; float pad1; float pad2; };
static float lanczos_w(float t, float a) {
t = abs(t);
if (t < 0.0001f) { return 1.0f; }
if (t >= a) { return 0.0f; }
const float PI = 3.14159265358979f;
float pt = PI * t;
return (a * sin(pt) * sin(pt / a)) / (pt * pt);
}
fragment float4 downscale_fs(VertOut in [[stage_in]],
texture2d<float> source [[texture(0)]],
sampler smp [[sampler(0)]],
constant DownscaleUniforms& u [[buffer(0)]])
{
float2 src_size = float2(source.get_width(), source.get_height());
float2 p = in.uv * src_size;
float2 p_floor = floor(p);
float a = (u.algorithm == 0) ? 2.0f : 3.0f;
int win = int(a);
float4 color = float4(0.0f);
float weight_sum = 0.0f;
for (int j = -win; j <= win; j++) {
for (int i = -win; i <= win; i++) {
float2 tap_center = p_floor + float2(float(i), float(j)) + 0.5f;
float2 offset = tap_center - p;
float w = lanczos_w(offset.x, a) * lanczos_w(offset.y, a);
color += source.sample(smp, tap_center / src_size) * w;
weight_sum += w;
}
}
return (weight_sum > 0.0f) ? (color / weight_sum) : float4(0.0f, 0.0f, 0.0f, 1.0f);
}
)";
static const char* CRTPI_FRAG_MSL = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct CrtPiUniforms {
// vec4 #0
float scanline_weight;
float scanline_gap_brightness;
float bloom_factor;
float input_gamma;
// vec4 #1
float output_gamma;
float mask_brightness;
float curvature_x;
float curvature_y;
// vec4 #2
int mask_type;
int enable_scanlines;
int enable_multisample;
int enable_gamma;
// vec4 #3
int enable_curvature;
int enable_sharper;
float texture_width;
float texture_height;
};
static float2 crtpi_distort(float2 coord, float2 screen_scale, float cx, float cy) {
float2 curvature = float2(cx, cy);
float2 barrel_scale = 1.0f - (0.23f * curvature);
coord *= screen_scale;
coord -= 0.5f;
float rsq = coord.x * coord.x + coord.y * coord.y;
coord += coord * (curvature * rsq);
coord *= barrel_scale;
if (abs(coord.x) >= 0.5f || abs(coord.y) >= 0.5f) { return float2(-1.0f); }
coord += 0.5f;
coord /= screen_scale;
return coord;
}
static float crtpi_scan_weight(float dist, float sw, float gap) {
return max(1.0f - dist * dist * sw, gap);
}
static float crtpi_scan_line(float dy, float filter_w, float sw, float gap, bool ms) {
float w = crtpi_scan_weight(dy, sw, gap);
if (ms) {
w += crtpi_scan_weight(dy - filter_w, sw, gap);
w += crtpi_scan_weight(dy + filter_w, sw, gap);
w *= 0.3333333f;
}
return w;
}
fragment float4 crtpi_fs(PostVOut in [[stage_in]],
texture2d<float> tex [[texture(0)]],
sampler samp [[sampler(0)]],
constant CrtPiUniforms& u [[buffer(0)]]) {
float2 tex_size = float2(u.texture_width, u.texture_height);
float filter_width = (768.0f / u.texture_height) / 3.0f;
float2 texcoord = in.uv;
if (u.enable_curvature != 0) {
texcoord = crtpi_distort(texcoord, float2(1.0f, 1.0f), u.curvature_x, u.curvature_y);
if (texcoord.x < 0.0f) { return float4(0.0f, 0.0f, 0.0f, 1.0f); }
}
float2 coord_in_pixels = texcoord * tex_size;
float2 tc;
float scan_weight;
if (u.enable_sharper != 0) {
float2 temp = floor(coord_in_pixels) + 0.5f;
tc = temp / tex_size;
float2 deltas = coord_in_pixels - temp;
scan_weight = crtpi_scan_line(deltas.y, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float2 signs = sign(deltas);
deltas.x *= 2.0f;
deltas = deltas * deltas;
deltas.y = deltas.y * deltas.y;
deltas.x *= 0.5f;
deltas.y *= 8.0f;
deltas /= tex_size;
deltas *= signs;
tc = tc + deltas;
} else {
float temp_y = floor(coord_in_pixels.y) + 0.5f;
float y_coord = temp_y / tex_size.y;
float dy = coord_in_pixels.y - temp_y;
scan_weight = crtpi_scan_line(dy, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float sign_y = sign(dy);
dy = dy * dy;
dy = dy * dy;
dy *= 8.0f;
dy /= tex_size.y;
dy *= sign_y;
tc = float2(texcoord.x, y_coord + dy);
}
float3 colour = tex.sample(samp, tc).rgb;
if (u.enable_scanlines != 0) {
if (u.enable_gamma != 0) { colour = pow(colour, float3(u.input_gamma)); }
colour *= scan_weight * u.bloom_factor;
if (u.enable_gamma != 0) { colour = pow(colour, float3(1.0f / u.output_gamma)); }
}
if (u.mask_type == 1) {
float wm = fract(in.pos.x * 0.5f);
float3 mask = (wm < 0.5f) ? float3(u.mask_brightness, 1.0f, u.mask_brightness)
: float3(1.0f, u.mask_brightness, 1.0f);
colour *= mask;
} else if (u.mask_type == 2) {
float wm = fract(in.pos.x * 0.3333333f);
float3 mask = float3(u.mask_brightness);
if (wm < 0.3333333f) mask.x = 1.0f;
else if (wm < 0.6666666f) mask.y = 1.0f;
else mask.z = 1.0f;
colour *= mask;
}
return float4(colour, 1.0f);
}
)";
// NOLINTEND(readability-identifier-naming)
#endif // __APPLE__
#include "core/rendering/sdl3gpu/msl/crtpi_frag.msl.h"
#include "core/rendering/sdl3gpu/msl/postfx_frag.msl.h"
#include "core/rendering/sdl3gpu/msl/postfx_vert.msl.h"
#include "core/rendering/sdl3gpu/msl/upscale_frag.msl.h"
#endif
namespace Rendering {
@@ -396,7 +53,6 @@ namespace Rendering {
game_width_ = static_cast<int>(fw);
game_height_ = static_cast<int>(fh);
uniforms_.screen_height = static_cast<float>(game_height_);
uniforms_.oversample = static_cast<float>(oversample_);
// ----------------------------------------------------------------
// 1. Create GPU device (solo si no existe ya)
@@ -461,9 +117,6 @@ namespace Rendering {
// textura a nullptr i el pipeline ometrà la còpia.
recreateInternalTexture();
// scaled_texture_ se creará en el primer render() una vez conocido el zoom de ventana
ss_factor_ = 0;
// ----------------------------------------------------------------
// 4. Create upload transfer buffer (CPU → GPU, always game resolution)
// ----------------------------------------------------------------
@@ -525,7 +178,7 @@ namespace Rendering {
}
is_initialized_ = true;
std::cout << "GPU Shader : initialized OK — game " << game_width_ << 'x' << game_height_ << ", oversample " << oversample_ << '\n';
std::cout << "GPU Shader : initialized OK — game " << game_width_ << 'x' << game_height_ << '\n';
return true;
}
@@ -537,8 +190,8 @@ namespace Rendering {
// ---- PostFX pipeline (scene/scaled → swapchain) ----
#ifdef __APPLE__
SDL_GPUShader* vert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, POSTFX_FRAG_MSL, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
SDL_GPUShader* vert = createShaderMSL(device_, Rendering::Msl::kPostfxVert, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, Rendering::Msl::kPostfxFrag, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* vert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderSPIRV(device_, kpostfx_frag_spv, kpostfx_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
@@ -581,8 +234,8 @@ namespace Rendering {
// ---- Upscale pipeline (scene → scaled_texture_, nearest) ----
#ifdef __APPLE__
SDL_GPUShader* uvert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* ufrag = createShaderMSL(device_, UPSCALE_FRAG_MSL, "upscale_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
SDL_GPUShader* uvert = createShaderMSL(device_, Rendering::Msl::kPostfxVert, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* ufrag = createShaderMSL(device_, Rendering::Msl::kUpscaleFrag, "upscale_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
#else
SDL_GPUShader* uvert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* ufrag = createShaderSPIRV(device_, kupscale_frag_spv, kupscale_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
@@ -617,83 +270,6 @@ namespace Rendering {
return false;
}
// ---- PostFX offscreen pipeline (scaled_texture_ → postfx_texture_, B8G8R8A8) ----
// Mismos shaders que pipeline_ pero con formato de salida B8G8R8A8_UNORM para textura intermedia.
#ifdef __APPLE__
SDL_GPUShader* ofvert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* offrag = createShaderMSL(device_, POSTFX_FRAG_MSL, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* ofvert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* offrag = createShaderSPIRV(device_, kpostfx_frag_spv, kpostfx_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((ofvert == nullptr) || (offrag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile PostFX offscreen shaders");
if (ofvert != nullptr) { SDL_ReleaseGPUShader(device_, ofvert); }
if (offrag != nullptr) { SDL_ReleaseGPUShader(device_, offrag); }
return false;
}
SDL_GPUColorTargetDescription offscreen_color_target = {};
offscreen_color_target.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
offscreen_color_target.blend_state = no_blend;
SDL_GPUGraphicsPipelineCreateInfo offscreen_pipe_info = {};
offscreen_pipe_info.vertex_shader = ofvert;
offscreen_pipe_info.fragment_shader = offrag;
offscreen_pipe_info.vertex_input_state = no_input;
offscreen_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
offscreen_pipe_info.target_info.num_color_targets = 1;
offscreen_pipe_info.target_info.color_target_descriptions = &offscreen_color_target;
postfx_offscreen_pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &offscreen_pipe_info);
SDL_ReleaseGPUShader(device_, ofvert);
SDL_ReleaseGPUShader(device_, offrag);
if (postfx_offscreen_pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: PostFX offscreen pipeline creation failed: %s", SDL_GetError());
return false;
}
// ---- Downscale pipeline (postfx_texture_ → swapchain, Lanczos) ----
#ifdef __APPLE__
SDL_GPUShader* dvert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* dfrag = createShaderMSL(device_, DOWNSCALE_FRAG_MSL, "downscale_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* dvert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* dfrag = createShaderSPIRV(device_, kdownscale_frag_spv, kdownscale_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#endif
if ((dvert == nullptr) || (dfrag == nullptr)) {
SDL_Log("SDL3GPUShader: failed to compile downscale shaders");
if (dvert != nullptr) { SDL_ReleaseGPUShader(device_, dvert); }
if (dfrag != nullptr) { SDL_ReleaseGPUShader(device_, dfrag); }
return false;
}
SDL_GPUColorTargetDescription downscale_color_target = {};
downscale_color_target.format = SWAPCHAIN_FMT;
downscale_color_target.blend_state = no_blend;
SDL_GPUGraphicsPipelineCreateInfo downscale_pipe_info = {};
downscale_pipe_info.vertex_shader = dvert;
downscale_pipe_info.fragment_shader = dfrag;
downscale_pipe_info.vertex_input_state = no_input;
downscale_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
downscale_pipe_info.target_info.num_color_targets = 1;
downscale_pipe_info.target_info.color_target_descriptions = &downscale_color_target;
downscale_pipeline_ = SDL_CreateGPUGraphicsPipeline(device_, &downscale_pipe_info);
SDL_ReleaseGPUShader(device_, dvert);
SDL_ReleaseGPUShader(device_, dfrag);
if (downscale_pipeline_ == nullptr) {
SDL_Log("SDL3GPUShader: downscale pipeline creation failed: %s", SDL_GetError());
return false;
}
return true;
}
@@ -707,8 +283,8 @@ namespace Rendering {
const SDL_GPUTextureFormat SWAPCHAIN_FMT = SDL_GetGPUSwapchainTextureFormat(device_, window_);
#ifdef __APPLE__
SDL_GPUShader* vert = createShaderMSL(device_, POSTFX_VERT_MSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, CRTPI_FRAG_MSL, "crtpi_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
SDL_GPUShader* vert = createShaderMSL(device_, Rendering::Msl::kPostfxVert, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderMSL(device_, Rendering::Msl::kCrtpiFrag, "crtpi_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
#else
SDL_GPUShader* vert = createShaderSPIRV(device_, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
SDL_GPUShader* frag = createShaderSPIRV(device_, kcrtpi_frag_spv, kcrtpi_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1);
@@ -778,20 +354,6 @@ namespace Rendering {
void SDL3GPUShader::render() { // NOLINT(readability-function-cognitive-complexity)
if (!is_initialized_) { return; }
// Paso 0: si SS activo, calcular el factor necesario según el zoom actual y recrear si cambió.
// Factor = primer múltiplo de 3 >= zoom (mín 3). Se recrea solo en saltos de factor.
if (oversample_ > 1 && game_height_ > 0) {
int win_w = 0;
int win_h = 0;
SDL_GetWindowSizeInPixels(window_, &win_w, &win_h);
const float ZOOM = static_cast<float>(win_h) / static_cast<float>(game_height_);
const int NEED_FACTOR = calcSsFactor(ZOOM);
if (NEED_FACTOR != ss_factor_) {
SDL_WaitForGPUIdle(device_);
recreateScaledTexture(NEED_FACTOR);
}
}
SDL_GPUCommandBuffer* cmd = SDL_AcquireGPUCommandBuffer(device_);
if (cmd == nullptr) {
SDL_Log("SDL3GPUShader: SDL_AcquireGPUCommandBuffer failed: %s", SDL_GetError());
@@ -847,46 +409,12 @@ namespace Rendering {
source_height = game_height_ * internal_res_;
}
// ---- Upscale pass: source_texture → scaled_texture_ ----
// Si 4:3 actiu, l'estirament s'aplica ací directament (320x200 → W*factor × H*factor*1.2)
// El filtre s'aplica sempre (texture_filter_linear_), independent de 4:3.
// L'effective_scene/height reflecteix la textura real que veuen els shaders.
// Sense SS ni stretch: scene_texture_ a game_height_.
// Amb SS o stretch: scaled_texture_ a l'alçada escalada (amb o sense 4:3).
// L'effective_scene és la textura font definitiva (internal_texture_
// si internal_res_ > 1, altrament scene_texture_). effective_height
// reflecteix l'alçada lògica del frame: game_height_ * 1.2 si 4:3 actiu.
SDL_GPUTexture* effective_scene = source_texture;
// `effective_height` reflecteix l'alçada lògica del frame (per a
// scanlines i viewport), no la mida real de la textura. Es manté
// a `game_height_` encara que internal_res_ > 1 — el multiplicador
// només afecta la resolució física de la font, no l'aspect ni el
// nombre de scanlines visibles.
int effective_height = game_height_;
(void)source_width; // només es fa servir com a context informatiu
if (oversample_ > 1 && scaled_texture_ != nullptr && upscale_pipeline_ != nullptr) {
SDL_GPUColorTargetInfo upscale_target = {};
upscale_target.texture = scaled_texture_;
upscale_target.load_op = SDL_GPU_LOADOP_DONT_CARE;
upscale_target.store_op = SDL_GPU_STOREOP_STORE;
// Filtre global: s'aplica sempre (ja no depèn de 4:3).
bool use_linear = texture_filter_linear_;
SDL_GPURenderPass* upass = SDL_BeginGPURenderPass(cmd, &upscale_target, 1, nullptr);
if (upass != nullptr) {
SDL_BindGPUGraphicsPipeline(upass, upscale_pipeline_);
SDL_GPUTextureSamplerBinding ubinding = {};
ubinding.texture = source_texture;
ubinding.sampler = (use_linear && linear_sampler_ != nullptr) ? linear_sampler_ : sampler_;
SDL_BindGPUFragmentSamplers(upass, 0, &ubinding, 1);
SDL_DrawGPUPrimitives(upass, 3, 1, 0, 0);
SDL_EndGPURenderPass(upass);
}
effective_scene = scaled_texture_;
effective_height = stretch_4_3_ ? static_cast<int>(static_cast<float>(game_height_) * 1.2F) : game_height_;
} else if (stretch_4_3_) {
// Sense SS: el viewport s'encarrega de l'estirament geomètric
effective_height = static_cast<int>(static_cast<float>(game_height_) * 1.2F);
}
int effective_height = stretch_4_3_ ? static_cast<int>(static_cast<float>(game_height_) * 1.2F) : game_height_;
(void)source_width;
(void)source_height;
// ---- Acquire swapchain texture ----
@@ -948,19 +476,10 @@ namespace Rendering {
vx = std::floor((static_cast<float>(sw) - vw) * 0.5F);
vy = std::floor((static_cast<float>(sh) - vh) * 0.5F);
// pixel_scale: subpíxels per píxel lògic.
// Sense SS: vh/effective_height (zoom de finestra).
// Amb SS: ss_factor_ exacte (3, 6, 9...).
if (oversample_ > 1 && ss_factor_ > 0) {
uniforms_.pixel_scale = static_cast<float>(ss_factor_);
} else {
uniforms_.pixel_scale = (effective_height > 0) ? (vh / static_cast<float>(effective_height)) : 1.0F;
}
// pixel_scale: subpíxels per píxel lògic (zoom de finestra).
uniforms_.pixel_scale = (effective_height > 0) ? (vh / static_cast<float>(effective_height)) : 1.0F;
uniforms_.screen_height = static_cast<float>(effective_height);
uniforms_.time = static_cast<float>(SDL_GetTicks()) / 1000.0F;
uniforms_.oversample = (oversample_ > 1 && ss_factor_ > 0)
? static_cast<float>(ss_factor_)
: 1.0F;
// ---- Path CrtPi: directo scene_texture_ → swapchain, sin SS ni Lanczos ----
if (active_shader_ == ShaderType::CRTPI && crtpi_pipeline_ != nullptr) {
@@ -999,87 +518,34 @@ namespace Rendering {
return;
}
// ---- Determinar si usar el path Lanczos (SS activo + algo seleccionado) ----
const bool USE_LANCZOS = (oversample_ > 1 && downscale_algo_ > 0 && scaled_texture_ != nullptr && postfx_texture_ != nullptr && postfx_offscreen_pipeline_ != nullptr && downscale_pipeline_ != nullptr);
// ---- Render pass: PostFX → swapchain ----
// Font: effective_scene (que ja és internal_texture_ si internal_res_ > 1, o
// scene_texture_ altrament). Sampler honora el filtre global de l'usuari.
SDL_GPUColorTargetInfo color_target = {};
color_target.texture = swapchain;
color_target.load_op = SDL_GPU_LOADOP_CLEAR;
color_target.store_op = SDL_GPU_STOREOP_STORE;
color_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
if (USE_LANCZOS) {
// ---- Pass A: PostFX → postfx_texture_ (full scaled size, sin viewport) ----
SDL_GPUColorTargetInfo postfx_target = {};
postfx_target.texture = postfx_texture_;
postfx_target.load_op = SDL_GPU_LOADOP_CLEAR;
postfx_target.store_op = SDL_GPU_STOREOP_STORE;
postfx_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPURenderPass* pass = SDL_BeginGPURenderPass(cmd, &color_target, 1, nullptr);
if (pass != nullptr) {
SDL_BindGPUGraphicsPipeline(pass, pipeline_);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(pass, &vp);
SDL_GPURenderPass* ppass = SDL_BeginGPURenderPass(cmd, &postfx_target, 1, nullptr);
if (ppass != nullptr) {
SDL_BindGPUGraphicsPipeline(ppass, postfx_offscreen_pipeline_);
SDL_GPUTextureSamplerBinding pbinding = {};
pbinding.texture = scaled_texture_;
pbinding.sampler = sampler_; // NEAREST: 1:1 pass, efectos calculados analíticamente
SDL_BindGPUFragmentSamplers(ppass, 0, &pbinding, 1);
SDL_PushGPUFragmentUniformData(cmd, 0, &uniforms_, sizeof(PostFXUniforms));
SDL_DrawGPUPrimitives(ppass, 3, 1, 0, 0);
SDL_EndGPURenderPass(ppass);
}
SDL_GPUSampler* active_sampler = (texture_filter_linear_ && linear_sampler_ != nullptr)
? linear_sampler_
: sampler_;
// ---- Pass B: Downscale Lanczos → swapchain (con viewport/letterbox) ----
SDL_GPUColorTargetInfo ds_target = {};
ds_target.texture = swapchain;
ds_target.load_op = SDL_GPU_LOADOP_CLEAR;
ds_target.store_op = SDL_GPU_STOREOP_STORE;
ds_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_GPUTextureSamplerBinding binding = {};
binding.texture = effective_scene;
binding.sampler = active_sampler;
SDL_BindGPUFragmentSamplers(pass, 0, &binding, 1);
SDL_GPURenderPass* dpass = SDL_BeginGPURenderPass(cmd, &ds_target, 1, nullptr);
if (dpass != nullptr) {
SDL_BindGPUGraphicsPipeline(dpass, downscale_pipeline_);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(dpass, &vp);
SDL_GPUTextureSamplerBinding dbinding = {};
dbinding.texture = postfx_texture_;
dbinding.sampler = sampler_; // NEAREST: el shader Lanczos hace su propia interpolación
SDL_BindGPUFragmentSamplers(dpass, 0, &dbinding, 1);
// algorithm: 0=Lanczos2, 1=Lanczos3 (downscale_algo_ es 1-based)
DownscaleUniforms downscale_u = {.algorithm = downscale_algo_ - 1, .pad0 = 0.0F, .pad1 = 0.0F, .pad2 = 0.0F};
SDL_PushGPUFragmentUniformData(cmd, 0, &downscale_u, sizeof(DownscaleUniforms));
SDL_DrawGPUPrimitives(dpass, 3, 1, 0, 0);
SDL_EndGPURenderPass(dpass);
}
} else {
// ---- Render pass: PostFX → swapchain directamente (bilinear, comportamiento original) ----
SDL_GPUColorTargetInfo color_target = {};
color_target.texture = swapchain;
color_target.load_op = SDL_GPU_LOADOP_CLEAR;
color_target.store_op = SDL_GPU_STOREOP_STORE;
color_target.clear_color = {.r = 0.0F, .g = 0.0F, .b = 0.0F, .a = 1.0F};
SDL_PushGPUFragmentUniformData(cmd, 0, &uniforms_, sizeof(PostFXUniforms));
SDL_GPURenderPass* pass = SDL_BeginGPURenderPass(cmd, &color_target, 1, nullptr);
if (pass != nullptr) {
SDL_BindGPUGraphicsPipeline(pass, pipeline_);
SDL_GPUViewport vp = {.x = vx, .y = vy, .w = vw, .h = vh, .min_depth = 0.0F, .max_depth = 1.0F};
SDL_SetGPUViewport(pass, &vp);
// Font: amb SS scaled_texture_; sense SS, effective_scene (que ja
// és internal_texture_ si internal_res_>1, o scene_texture_ si no).
// Sampler: honora el filtre global que l'usuari tria al menú
// (texture_filter_linear_). Abans estava hardcoded a NEAREST
// quan SS era off — el menú no tenia efecte visible en aquest path.
SDL_GPUTexture* input_texture = (oversample_ > 1 && scaled_texture_ != nullptr)
? scaled_texture_
: effective_scene;
SDL_GPUSampler* active_sampler = (texture_filter_linear_ && linear_sampler_ != nullptr)
? linear_sampler_
: sampler_;
SDL_GPUTextureSamplerBinding binding = {};
binding.texture = input_texture;
binding.sampler = active_sampler;
SDL_BindGPUFragmentSamplers(pass, 0, &binding, 1);
SDL_PushGPUFragmentUniformData(cmd, 0, &uniforms_, sizeof(PostFXUniforms));
SDL_DrawGPUPrimitives(pass, 3, 1, 0, 0);
SDL_EndGPURenderPass(pass);
}
SDL_DrawGPUPrimitives(pass, 3, 1, 0, 0);
SDL_EndGPURenderPass(pass);
}
SDL_SubmitGPUCommandBuffer(cmd);
@@ -1102,18 +568,10 @@ namespace Rendering {
SDL_ReleaseGPUGraphicsPipeline(device_, crtpi_pipeline_);
crtpi_pipeline_ = nullptr;
}
if (postfx_offscreen_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, postfx_offscreen_pipeline_);
postfx_offscreen_pipeline_ = nullptr;
}
if (upscale_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, upscale_pipeline_);
upscale_pipeline_ = nullptr;
}
if (downscale_pipeline_ != nullptr) {
SDL_ReleaseGPUGraphicsPipeline(device_, downscale_pipeline_);
downscale_pipeline_ = nullptr;
}
if (scene_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scene_texture_);
scene_texture_ = nullptr;
@@ -1122,15 +580,6 @@ namespace Rendering {
SDL_ReleaseGPUTexture(device_, internal_texture_);
internal_texture_ = nullptr;
}
if (scaled_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
}
if (postfx_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, postfx_texture_);
postfx_texture_ = nullptr;
}
ss_factor_ = 0;
if (upload_buffer_ != nullptr) {
SDL_ReleaseGPUTransferBuffer(device_, upload_buffer_);
upload_buffer_ = nullptr;
@@ -1211,15 +660,17 @@ namespace Rendering {
void SDL3GPUShader::setPostFXParams(const PostFXParams& p) {
uniforms_.vignette_strength = p.vignette;
uniforms_.chroma_strength = p.chroma;
uniforms_.chroma_min = p.chroma_min;
uniforms_.chroma_max = p.chroma_max;
uniforms_.mask_strength = p.mask;
uniforms_.gamma_strength = p.gamma;
uniforms_.curvature = p.curvature;
uniforms_.bleeding = p.bleeding;
uniforms_.flicker = p.flicker;
// Las scanlines siempre las aplica el shader PostFX en GPU.
uniforms_.scanline_strength = p.scanlines;
uniforms_.scan_dark_ratio = p.scan_dark_ratio;
uniforms_.scan_dark_floor = p.scan_dark_floor;
uniforms_.scan_edge_soft = p.scan_edge_soft;
}
void SDL3GPUShader::setCrtPiParams(const CrtPiParams& p) {
@@ -1288,42 +739,11 @@ namespace Rendering {
void SDL3GPUShader::setStretch43(bool enabled) {
stretch_4_3_ = enabled;
if (!is_initialized_ || device_ == nullptr) {
return;
}
// Recrear scaled_texture_ perquè tinga les dimensions correctes (amb o sense 4:3)
if (oversample_ > 1 && ss_factor_ > 0) {
SDL_WaitForGPUIdle(device_);
recreateScaledTexture(ss_factor_);
}
}
// ---------------------------------------------------------------------------
// setOversample — cambia el factor SS; recrea texturas si ya está inicializado
// ---------------------------------------------------------------------------
void SDL3GPUShader::setOversample(int factor) {
const int NEW_FACTOR = std::max(1, factor);
if (NEW_FACTOR == oversample_) { return; }
oversample_ = NEW_FACTOR;
if (is_initialized_) {
reinitTexturesAndBuffer();
// scanline_strength se actualizará en el próximo setPostFXParams
}
}
void SDL3GPUShader::setDownscaleAlgo(int algo) {
downscale_algo_ = std::max(0, std::min(algo, 2));
}
auto SDL3GPUShader::getSsTextureSize() const -> std::pair<int, int> {
if (ss_factor_ <= 1) { return {0, 0}; }
return {game_width_ * ss_factor_, game_height_ * ss_factor_};
}
// ---------------------------------------------------------------------------
// reinitTexturesAndBuffer — recrea scene_texture_, scaled_texture_ y
// upload_buffer_ con el factor oversample_ actual. No toca pipelines ni samplers.
// reinitTexturesAndBuffer — recrea scene_texture_, internal_texture_ i
// upload_buffer_. No toca pipelines ni samplers.
// ---------------------------------------------------------------------------
auto SDL3GPUShader::reinitTexturesAndBuffer() -> bool {
if (device_ == nullptr) { return false; }
@@ -1337,22 +757,14 @@ namespace Rendering {
SDL_ReleaseGPUTexture(device_, internal_texture_);
internal_texture_ = nullptr;
}
// scaled_texture_ se libera aquí; se recreará en el primer render() con el factor correcto
if (scaled_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
}
ss_factor_ = 0;
if (upload_buffer_ != nullptr) {
SDL_ReleaseGPUTransferBuffer(device_, upload_buffer_);
upload_buffer_ = nullptr;
}
uniforms_.screen_height = static_cast<float>(game_height_);
uniforms_.oversample = static_cast<float>(oversample_);
// scene_texture_: siempre a resolución del juego
// scene_texture_: sempre a resolució del joc
SDL_GPUTextureCreateInfo tex_info = {};
tex_info.type = SDL_GPU_TEXTURETYPE_2D;
tex_info.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
@@ -1367,7 +779,6 @@ namespace Rendering {
return false;
}
// upload_buffer_: siempre a resolución del juego
SDL_GPUTransferBufferCreateInfo tb_info = {};
tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
tb_info.size = static_cast<Uint32>(game_width_ * game_height_ * 4);
@@ -1379,82 +790,9 @@ namespace Rendering {
return false;
}
// Recrea la textura interna si internal_res_ > 1 — manté coherència
// en canvis d'SS que passen per reinitTexturesAndBuffer().
recreateInternalTexture();
SDL_Log("SDL3GPUShader: reinit — scene %dx%d, SS %s, internal ×%d (scaled se creará en render)",
game_width_,
game_height_,
oversample_ > 1 ? "on" : "off",
internal_res_);
return true;
}
// ---------------------------------------------------------------------------
// calcSsFactor — primer múltiplo de 3 >= zoom, mínimo 3.
// Ejemplos: zoom 1,2,3 → 3; zoom 4,5,6 → 6; zoom 4.4 → 6; zoom 7,8,9 → 9.
// ---------------------------------------------------------------------------
auto SDL3GPUShader::calcSsFactor(float zoom) -> int {
const int MULTIPLE = 3;
const int N = static_cast<int>(std::ceil(zoom / static_cast<float>(MULTIPLE)));
return std::max(1, N) * MULTIPLE;
}
// ---------------------------------------------------------------------------
// recreateScaledTexture — libera y recrea scaled_texture_ para el factor dado.
// Llamar solo cuando device_ no esté ejecutando comandos (SDL_WaitForGPUIdle previo).
// ---------------------------------------------------------------------------
auto SDL3GPUShader::recreateScaledTexture(int factor) -> bool {
if (scaled_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
}
if (postfx_texture_ != nullptr) {
SDL_ReleaseGPUTexture(device_, postfx_texture_);
postfx_texture_ = nullptr;
}
ss_factor_ = 0;
const int W = game_width_ * factor;
// Si 4:3 actiu, l'alçada inclou l'estirament (200 * factor * 1.2)
const int H = stretch_4_3_
? static_cast<int>(static_cast<float>(game_height_) * 1.2F * static_cast<float>(factor))
: game_height_ * factor;
SDL_GPUTextureCreateInfo info = {};
info.type = SDL_GPU_TEXTURETYPE_2D;
info.format = SDL_GPU_TEXTUREFORMAT_R8G8B8A8_UNORM;
info.usage = SDL_GPU_TEXTUREUSAGE_SAMPLER | SDL_GPU_TEXTUREUSAGE_COLOR_TARGET;
info.width = static_cast<Uint32>(W);
info.height = static_cast<Uint32>(H);
info.layer_count_or_depth = 1;
info.num_levels = 1;
scaled_texture_ = SDL_CreateGPUTexture(device_, &info);
if (scaled_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create scaled texture %dx%d (factor %d): %s",
W,
H,
factor,
SDL_GetError());
return false;
}
postfx_texture_ = SDL_CreateGPUTexture(device_, &info);
if (postfx_texture_ == nullptr) {
SDL_Log("SDL3GPUShader: failed to create postfx texture %dx%d (factor %d): %s",
W,
H,
factor,
SDL_GetError());
SDL_ReleaseGPUTexture(device_, scaled_texture_);
scaled_texture_ = nullptr;
return false;
}
ss_factor_ = factor;
SDL_Log("SDL3GPUShader: scaled+postfx textures %dx%d (factor %d×)", W, H, factor);
SDL_Log("SDL3GPUShader: reinit — scene %dx%d, internal ×%d", game_width_, game_height_, internal_res_);
return true;
}
File diff suppressed because it is too large Load Diff