shader postfx nou + spv regenerat + msl extret a headers

This commit is contained in:
2026-05-17 16:53:32 +02:00
parent e887b77dcb
commit e57944398a
5 changed files with 10020 additions and 7241 deletions
+47 -18
View File
@@ -6,7 +6,9 @@
// xxd -i postfx.frag.spv > ../../source/core/rendering/sdl3gpu/postfx_frag_spv.h
//
// PostFXUniforms must match exactly the C++ struct in sdl3gpu_shader.hpp
// (8 floats, 32 bytes, std140/scalar layout).
// (16 floats = 4 × vec4 = 64 bytes, std140/scalar layout).
// IMPORTANT: Qualsevol canvi ací cal replicar-lo a mà a
// source/core/rendering/sdl3gpu/msl/postfx_frag.msl.h (no hi ha generador).
layout(location = 0) in vec2 v_uv;
layout(location = 0) out vec4 out_color;
@@ -15,7 +17,7 @@ layout(set = 2, binding = 0) uniform sampler2D scene;
layout(set = 3, binding = 0) uniform PostFXUniforms {
float vignette_strength;
float chroma_strength;
float chroma_min; // intensitat mínima de l'aberració cromàtica
float scanline_strength;
float screen_height;
float mask_strength;
@@ -24,10 +26,28 @@ layout(set = 3, binding = 0) uniform PostFXUniforms {
float bleeding;
float pixel_scale; // physical pixels per logical pixel (vh / tex_height_)
float time; // seconds since SDL init
float oversample; // supersampling factor (1.0 = off, 3.0 = 3×SS)
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz — 48 bytes total (3 × 16)
float flicker; // 0 = off, 1 = phosphor flicker ~50 Hz
float chroma_max; // intensitat màxima; si == chroma_min → chroma estàtic
// vec4 #3 — paràmetres de scanlines (exposats per preset YAML)
float scan_dark_ratio; // fracció de subfila fosca per fila lògica (1/3 ≈ 0.333)
float scan_dark_floor; // multiplicador de brillantor de la subfila fosca
float scan_edge_soft; // 0 = step dur; 1 = suavitzat d'1 píxel físic (estil crtpi)
float pad3; // padding per tancar a 64 bytes (4 × vec4)
} u;
// Mostreig bilinear horitzontal d'un canal RGB. Evita el "tic-tac" del sampler
// NEAREST quan l'offset de chroma és subpíxel: sense interpolar, l'offset
// arrodonia entre 1 i 2 píxels i el drift temporal feia un parpelleig discret.
float sampleBilinearX(vec2 uv_target, int channel) {
vec2 tex_size = vec2(textureSize(scene, 0));
float px = uv_target.x * tex_size.x - 0.5;
float p_floor = floor(px);
float f = px - p_floor;
vec4 c0 = texture(scene, vec2((p_floor + 0.5) / tex_size.x, uv_target.y));
vec4 c1 = texture(scene, vec2((p_floor + 1.5) / tex_size.x, uv_target.y));
return mix(c0[channel], c1[channel], f);
}
// YCbCr helpers for NTSC bleeding
vec3 rgb_to_ycc(vec3 rgb) {
return vec3(
@@ -69,11 +89,11 @@ void main() {
vec3 base = texture(scene, uv).rgb;
// Sangrado NTSC — difuminado horizontal de crominancia.
// step = 1 pixel lógico de juego en UV (corrige SS: textureSize.x = game_w * oversample).
// step = 1 pixel lógico de juego en UV.
vec3 colour;
if (u.bleeding > 0.0) {
float tw = float(textureSize(scene, 0).x);
float step = u.oversample / tw; // 1 pixel lógico en UV
float step = 1.0 / tw; // 1 pixel lógico en UV
vec3 ycc = rgb_to_ycc(base);
vec3 ycc_l2 = rgb_to_ycc(texture(scene, uv - vec2(2.0*step, 0.0)).rgb);
vec3 ycc_l1 = rgb_to_ycc(texture(scene, uv - vec2(1.0*step, 0.0)).rgb);
@@ -85,10 +105,14 @@ void main() {
colour = base;
}
// Aberración cromática (drift animado con time para efecto NTSC real)
float ca = u.chroma_strength * 0.005 * (1.0 + 0.15 * sin(u.time * 7.3));
colour.r = texture(scene, uv + vec2(ca, 0.0)).r;
colour.b = texture(scene, uv - vec2(ca, 0.0)).b;
// Aberración cromática — intensitat varia entre chroma_min i chroma_max amb
// una sinusoidal (si min == max, queda estàtica). Mostreig bilinear horitzontal
// per evitar el "tic-tac" del NEAREST sampler quan l'offset és subpíxel.
if (u.chroma_min > 0.0 || u.chroma_max > 0.0) {
float ca = mix(u.chroma_min, u.chroma_max, 0.5 + 0.5 * sin(u.time * 7.3)) * 0.005;
colour.r = sampleBilinearX(uv + vec2(ca, 0.0), 0);
colour.b = sampleBilinearX(uv - vec2(ca, 0.0), 2);
}
// Corrección gamma (linealizar antes de scanlines, codificar después)
if (u.gamma_strength > 0.0) {
@@ -96,15 +120,20 @@ void main() {
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — 1 pixel físico oscuro por fila lógica.
// Modelo sustractivo: las filas de scanline se oscurecen, las demás no cambian.
// Esto evita el efecto de sobrebrillo en contenido con colores vivos.
// Scanlines — tècnica dels 3 subpíxels verticals per píxel lògic (aee/projecte_2026):
// franja fosca ocupant `scan_dark_ratio` al final de cada fila lògica. La transició es
// suavitza amb smoothstep d'ample ≈ 1 píxel físic (estil crtpi: filtratge analític
// continu), controlat per `scan_edge_soft`. A 0 és equivalent al step dur antic.
if (u.scanline_strength > 0.0) {
float ps = max(1.0, round(u.pixel_scale));
float frac_in_row = fract(uv.y * u.screen_height);
float row_pos = floor(frac_in_row * ps);
float is_dark = step(ps - 1.0, row_pos);
float scan = mix(1.0, 0.0, is_dark);
float ps = max(u.pixel_scale, 1.0);
float sub = fract(uv.y * u.screen_height); // [0,1) dins la fila lògica
float dark_center = 1.0 - u.scan_dark_ratio * 0.5; // centre de la franja fosca
float d = abs(sub - dark_center);
d = min(d, 1.0 - d); // wrap a la fila següent
float half_width = u.scan_dark_ratio * 0.5;
float softness = u.scan_edge_soft * 0.5 / ps; // mig píxel físic a cada costat
float band = 1.0 - smoothstep(half_width - softness, half_width + softness, d);
float scan = mix(1.0, u.scan_dark_floor, band);
colour *= mix(1.0, scan, u.scanline_strength);
}
@@ -0,0 +1,144 @@
#pragma once
#ifdef __APPLE__
// Fragment shader del shader "crtpi" (algoritme CRT-Pi): scanlines amb
// pesos gaussians, multisample opcional, gamma i màscara de subpíxels.
namespace Rendering::Msl {
inline constexpr const char* kCrtpiFrag = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct CrtPiUniforms {
float scanline_weight;
float scanline_gap_brightness;
float bloom_factor;
float input_gamma;
float output_gamma;
float mask_brightness;
float curvature_x;
float curvature_y;
int mask_type;
int enable_scanlines;
int enable_multisample;
int enable_gamma;
int enable_curvature;
int enable_sharper;
float texture_width;
float texture_height;
};
static float2 crtpi_distort(float2 coord, float2 screen_scale, float cx, float cy) {
float2 curvature = float2(cx, cy);
float2 barrel_scale = 1.0f - (0.23f * curvature);
coord *= screen_scale;
coord -= 0.5f;
float rsq = coord.x * coord.x + coord.y * coord.y;
coord += coord * (curvature * rsq);
coord *= barrel_scale;
if (abs(coord.x) >= 0.5f || abs(coord.y) >= 0.5f) { return float2(-1.0f); }
coord += 0.5f;
coord /= screen_scale;
return coord;
}
static float crtpi_scan_weight(float dist, float sw, float gap) {
return max(1.0f - dist * dist * sw, gap);
}
static float crtpi_scan_line(float dy, float filter_w, float sw, float gap, bool ms) {
float w = crtpi_scan_weight(dy, sw, gap);
if (ms) {
w += crtpi_scan_weight(dy - filter_w, sw, gap);
w += crtpi_scan_weight(dy + filter_w, sw, gap);
w *= 0.3333333f;
}
return w;
}
fragment float4 crtpi_fs(PostVOut in [[stage_in]],
texture2d<float> tex [[texture(0)]],
sampler samp [[sampler(0)]],
constant CrtPiUniforms& u [[buffer(0)]]) {
float2 tex_size = float2(u.texture_width, u.texture_height);
// Amplada del filtre de scanline analític. 768 = alçada de referència
// CRT a la qual es va tarar l'algoritme original; 3 = divisió per
// subpíxel (R/G/B) del multisample. El resultat escala amb la textura
// d'entrada, de manera que més alçada → filtre més fi.
const float CRT_REFERENCE_HEIGHT = 768.0f;
const float SUBPIXEL_DIV = 3.0f;
float filter_width = (CRT_REFERENCE_HEIGHT / u.texture_height) / SUBPIXEL_DIV;
float2 texcoord = in.uv;
if (u.enable_curvature != 0) {
texcoord = crtpi_distort(texcoord, float2(1.0f, 1.0f), u.curvature_x, u.curvature_y);
if (texcoord.x < 0.0f) { return float4(0.0f, 0.0f, 0.0f, 1.0f); }
}
float2 coord_in_pixels = texcoord * tex_size;
float2 tc;
float scan_weight;
if (u.enable_sharper != 0) {
float2 temp = floor(coord_in_pixels) + 0.5f;
tc = temp / tex_size;
float2 deltas = coord_in_pixels - temp;
scan_weight = crtpi_scan_line(deltas.y, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float2 signs = sign(deltas);
deltas.x *= 2.0f;
deltas = deltas * deltas;
deltas.y = deltas.y * deltas.y;
deltas.x *= 0.5f;
deltas.y *= 8.0f;
deltas /= tex_size;
deltas *= signs;
tc = tc + deltas;
} else {
float temp_y = floor(coord_in_pixels.y) + 0.5f;
float y_coord = temp_y / tex_size.y;
float dy = coord_in_pixels.y - temp_y;
scan_weight = crtpi_scan_line(dy, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
float sign_y = sign(dy);
dy = dy * dy;
dy = dy * dy;
dy *= 8.0f;
dy /= tex_size.y;
dy *= sign_y;
tc = float2(texcoord.x, y_coord + dy);
}
float3 colour = tex.sample(samp, tc).rgb;
if (u.enable_scanlines != 0) {
if (u.enable_gamma != 0) { colour = pow(colour, float3(u.input_gamma)); }
colour *= scan_weight * u.bloom_factor;
if (u.enable_gamma != 0) { colour = pow(colour, float3(1.0f / u.output_gamma)); }
}
if (u.mask_type == 1) {
float wm = fract(in.pos.x * 0.5f);
float3 mask = (wm < 0.5f) ? float3(u.mask_brightness, 1.0f, u.mask_brightness)
: float3(1.0f, u.mask_brightness, 1.0f);
colour *= mask;
} else if (u.mask_type == 2) {
float wm = fract(in.pos.x * 0.3333333f);
float3 mask = float3(u.mask_brightness);
if (wm < 0.3333333f) mask.x = 1.0f;
else if (wm < 0.6666666f) mask.y = 1.0f;
else mask.z = 1.0f;
colour *= mask;
}
return float4(colour, 1.0f);
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
@@ -0,0 +1,168 @@
#pragma once
#ifdef __APPLE__
// Fragment shader del shader "postfx": vignette, chroma, scanlines, mask,
// gamma, curvature, bleeding i flicker. Els paràmetres venen via uniforms.
//
// IMPORTANT: mantenir sincronitzat a mà amb data/shaders/postfx.frag. SDL3 GPU
// compila aquest string MSL en runtime; no hi ha generador automàtic. Qualsevol
// canvi a la struct d'uniforms o a la lògica del GLSL cal replicar-lo ací al
// mateix commit. Mida total = 64 bytes (4 × vec4).
namespace Rendering::Msl {
inline constexpr const char* kPostfxFrag = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
struct PostFXUniforms {
float vignette_strength;
float chroma_min;
float scanline_strength;
float screen_height;
float mask_strength;
float gamma_strength;
float curvature;
float bleeding;
float pixel_scale;
float time;
float flicker;
float chroma_max;
// vec4 #3 — paràmetres de scanlines (exposats per preset YAML)
float scan_dark_ratio;
float scan_dark_floor;
float scan_edge_soft;
float pad3;
};
// Mostreig bilinear horitzontal d'un canal RGB. Evita el "tic-tac" del sampler
// NEAREST quan l'offset de chroma és subpíxel.
static float sampleBilinearX(float2 uv_target, int channel, texture2d<float> scene, sampler samp) {
float2 tex_size = float2(scene.get_width(), scene.get_height());
float px = uv_target.x * tex_size.x - 0.5f;
float p_floor = floor(px);
float f = px - p_floor;
float4 c0 = scene.sample(samp, float2((p_floor + 0.5f) / tex_size.x, uv_target.y));
float4 c1 = scene.sample(samp, float2((p_floor + 1.5f) / tex_size.x, uv_target.y));
return mix(c0[channel], c1[channel], f);
}
static float3 rgb_to_ycc(float3 rgb) {
return float3(
0.299f*rgb.r + 0.587f*rgb.g + 0.114f*rgb.b,
-0.169f*rgb.r - 0.331f*rgb.g + 0.500f*rgb.b + 0.5f,
0.500f*rgb.r - 0.419f*rgb.g - 0.081f*rgb.b + 0.5f
);
}
static float3 ycc_to_rgb(float3 ycc) {
float y = ycc.x;
float cb = ycc.y - 0.5f;
float cr = ycc.z - 0.5f;
return clamp(float3(
y + 1.402f*cr,
y - 0.344f*cb - 0.714f*cr,
y + 1.772f*cb
), 0.0f, 1.0f);
}
fragment float4 postfx_fs(PostVOut in [[stage_in]],
texture2d<float> scene [[texture(0)]],
sampler samp [[sampler(0)]],
constant PostFXUniforms& u [[buffer(0)]]) {
float2 uv = in.uv;
if (u.curvature > 0.0f) {
float2 c = uv - 0.5f;
float rsq = dot(c, c);
float2 dist = float2(0.05f, 0.1f) * u.curvature;
float2 barrelScale = 1.0f - 0.23f * dist;
c += c * (dist * rsq);
c *= barrelScale;
if (abs(c.x) >= 0.5f || abs(c.y) >= 0.5f) {
return float4(0.0f, 0.0f, 0.0f, 1.0f);
}
uv = c + 0.5f;
}
float3 base = scene.sample(samp, uv).rgb;
float3 colour;
if (u.bleeding > 0.0f) {
float tw = float(scene.get_width());
float step = 1.0f / tw;
float3 ycc = rgb_to_ycc(base);
float3 ycc_l2 = rgb_to_ycc(scene.sample(samp, uv - float2(2.0f*step, 0.0f)).rgb);
float3 ycc_l1 = rgb_to_ycc(scene.sample(samp, uv - float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r1 = rgb_to_ycc(scene.sample(samp, uv + float2(1.0f*step, 0.0f)).rgb);
float3 ycc_r2 = rgb_to_ycc(scene.sample(samp, uv + float2(2.0f*step, 0.0f)).rgb);
ycc.yz = (ycc_l2.yz + ycc_l1.yz*2.0f + ycc.yz*2.0f + ycc_r1.yz*2.0f + ycc_r2.yz) / 8.0f;
colour = mix(base, ycc_to_rgb(ycc), u.bleeding);
} else {
colour = base;
}
// Chroma — varia entre chroma_min i chroma_max via sinusoidal; si min == max
// queda estàtic. Mostreig bilinear horitzontal per evitar el "tic-tac" del
// NEAREST sampler amb offsets subpíxel.
if (u.chroma_min > 0.0f || u.chroma_max > 0.0f) {
float ca = mix(u.chroma_min, u.chroma_max, 0.5f + 0.5f * sin(u.time * 7.3f)) * 0.005f;
colour.r = sampleBilinearX(uv + float2(ca, 0.0f), 0, scene, samp);
colour.b = sampleBilinearX(uv - float2(ca, 0.0f), 2, scene, samp);
}
if (u.gamma_strength > 0.0f) {
float3 lin = pow(colour, float3(2.4f));
colour = mix(colour, lin, u.gamma_strength);
}
// Scanlines — 3 subpíxels per fila lògica (2 brillants + 1 fosca). Transició
// suavitzada amb smoothstep d'ample ≈ 1 píxel físic (estil crtpi: filtratge
// analític continu). scan_edge_soft = 0 recupera el step dur de l'original.
if (u.scanline_strength > 0.0f) {
float ps = max(u.pixel_scale, 1.0f);
float sub = fract(uv.y * u.screen_height);
float dark_center = 1.0f - u.scan_dark_ratio * 0.5f;
float d = abs(sub - dark_center);
d = min(d, 1.0f - d);
float half_width = u.scan_dark_ratio * 0.5f;
float softness = u.scan_edge_soft * 0.5f / ps;
float band = 1.0f - smoothstep(half_width - softness, half_width + softness, d);
float scan = mix(1.0f, u.scan_dark_floor, band);
colour *= mix(1.0f, scan, u.scanline_strength);
}
if (u.gamma_strength > 0.0f) {
float3 enc = pow(colour, float3(1.0f/2.2f));
colour = mix(colour, enc, u.gamma_strength);
}
float2 d = uv - 0.5f;
float vignette = 1.0f - dot(d, d) * u.vignette_strength;
colour *= clamp(vignette, 0.0f, 1.0f);
if (u.mask_strength > 0.0f) {
float whichMask = fract(in.pos.x * 0.3333333f);
float3 mask = float3(0.80f);
if (whichMask < 0.3333333f) mask.x = 1.0f;
else if (whichMask < 0.6666667f) mask.y = 1.0f;
else mask.z = 1.0f;
colour = mix(colour, colour * mask, u.mask_strength);
}
if (u.flicker > 0.0f) {
float flicker_wave = sin(u.time * 100.0f) * 0.5f + 0.5f;
colour *= 1.0f - u.flicker * 0.04f * flicker_wave;
}
return float4(colour, 1.0f);
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
@@ -0,0 +1,30 @@
#pragma once
#ifdef __APPLE__
// Vertex shader compartit per tots els pipelines de post-procés:
// fullscreen-triangle que cobreix tota l'àrea del swapchain amb UVs a [0,1].
namespace Rendering::Msl {
inline constexpr const char* kPostfxVert = R"(
#include <metal_stdlib>
using namespace metal;
struct PostVOut {
float4 pos [[position]];
float2 uv;
};
vertex PostVOut postfx_vs(uint vid [[vertex_id]]) {
const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} };
const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} };
PostVOut out;
out.pos = float4(positions[vid], 0.0, 1.0);
out.uv = uvs[vid];
return out;
}
)";
} // namespace Rendering::Msl
#endif // __APPLE__
File diff suppressed because it is too large Load Diff