shader postfx nou + spv regenerat + msl extret a headers
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
// Fragment shader del shader "crtpi" (algoritme CRT-Pi): scanlines amb
|
||||
// pesos gaussians, multisample opcional, gamma i màscara de subpíxels.
|
||||
namespace Rendering::Msl {
|
||||
|
||||
inline constexpr const char* kCrtpiFrag = R"(
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct PostVOut {
|
||||
float4 pos [[position]];
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
struct CrtPiUniforms {
|
||||
float scanline_weight;
|
||||
float scanline_gap_brightness;
|
||||
float bloom_factor;
|
||||
float input_gamma;
|
||||
float output_gamma;
|
||||
float mask_brightness;
|
||||
float curvature_x;
|
||||
float curvature_y;
|
||||
int mask_type;
|
||||
int enable_scanlines;
|
||||
int enable_multisample;
|
||||
int enable_gamma;
|
||||
int enable_curvature;
|
||||
int enable_sharper;
|
||||
float texture_width;
|
||||
float texture_height;
|
||||
};
|
||||
|
||||
static float2 crtpi_distort(float2 coord, float2 screen_scale, float cx, float cy) {
|
||||
float2 curvature = float2(cx, cy);
|
||||
float2 barrel_scale = 1.0f - (0.23f * curvature);
|
||||
coord *= screen_scale;
|
||||
coord -= 0.5f;
|
||||
float rsq = coord.x * coord.x + coord.y * coord.y;
|
||||
coord += coord * (curvature * rsq);
|
||||
coord *= barrel_scale;
|
||||
if (abs(coord.x) >= 0.5f || abs(coord.y) >= 0.5f) { return float2(-1.0f); }
|
||||
coord += 0.5f;
|
||||
coord /= screen_scale;
|
||||
return coord;
|
||||
}
|
||||
|
||||
static float crtpi_scan_weight(float dist, float sw, float gap) {
|
||||
return max(1.0f - dist * dist * sw, gap);
|
||||
}
|
||||
|
||||
static float crtpi_scan_line(float dy, float filter_w, float sw, float gap, bool ms) {
|
||||
float w = crtpi_scan_weight(dy, sw, gap);
|
||||
if (ms) {
|
||||
w += crtpi_scan_weight(dy - filter_w, sw, gap);
|
||||
w += crtpi_scan_weight(dy + filter_w, sw, gap);
|
||||
w *= 0.3333333f;
|
||||
}
|
||||
return w;
|
||||
}
|
||||
|
||||
fragment float4 crtpi_fs(PostVOut in [[stage_in]],
|
||||
texture2d<float> tex [[texture(0)]],
|
||||
sampler samp [[sampler(0)]],
|
||||
constant CrtPiUniforms& u [[buffer(0)]]) {
|
||||
float2 tex_size = float2(u.texture_width, u.texture_height);
|
||||
// Amplada del filtre de scanline analític. 768 = alçada de referència
|
||||
// CRT a la qual es va tarar l'algoritme original; 3 = divisió per
|
||||
// subpíxel (R/G/B) del multisample. El resultat escala amb la textura
|
||||
// d'entrada, de manera que més alçada → filtre més fi.
|
||||
const float CRT_REFERENCE_HEIGHT = 768.0f;
|
||||
const float SUBPIXEL_DIV = 3.0f;
|
||||
float filter_width = (CRT_REFERENCE_HEIGHT / u.texture_height) / SUBPIXEL_DIV;
|
||||
float2 texcoord = in.uv;
|
||||
|
||||
if (u.enable_curvature != 0) {
|
||||
texcoord = crtpi_distort(texcoord, float2(1.0f, 1.0f), u.curvature_x, u.curvature_y);
|
||||
if (texcoord.x < 0.0f) { return float4(0.0f, 0.0f, 0.0f, 1.0f); }
|
||||
}
|
||||
|
||||
float2 coord_in_pixels = texcoord * tex_size;
|
||||
float2 tc;
|
||||
float scan_weight;
|
||||
|
||||
if (u.enable_sharper != 0) {
|
||||
float2 temp = floor(coord_in_pixels) + 0.5f;
|
||||
tc = temp / tex_size;
|
||||
float2 deltas = coord_in_pixels - temp;
|
||||
scan_weight = crtpi_scan_line(deltas.y, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
|
||||
float2 signs = sign(deltas);
|
||||
deltas.x *= 2.0f;
|
||||
deltas = deltas * deltas;
|
||||
deltas.y = deltas.y * deltas.y;
|
||||
deltas.x *= 0.5f;
|
||||
deltas.y *= 8.0f;
|
||||
deltas /= tex_size;
|
||||
deltas *= signs;
|
||||
tc = tc + deltas;
|
||||
} else {
|
||||
float temp_y = floor(coord_in_pixels.y) + 0.5f;
|
||||
float y_coord = temp_y / tex_size.y;
|
||||
float dy = coord_in_pixels.y - temp_y;
|
||||
scan_weight = crtpi_scan_line(dy, filter_width, u.scanline_weight, u.scanline_gap_brightness, u.enable_multisample != 0);
|
||||
float sign_y = sign(dy);
|
||||
dy = dy * dy;
|
||||
dy = dy * dy;
|
||||
dy *= 8.0f;
|
||||
dy /= tex_size.y;
|
||||
dy *= sign_y;
|
||||
tc = float2(texcoord.x, y_coord + dy);
|
||||
}
|
||||
|
||||
float3 colour = tex.sample(samp, tc).rgb;
|
||||
|
||||
if (u.enable_scanlines != 0) {
|
||||
if (u.enable_gamma != 0) { colour = pow(colour, float3(u.input_gamma)); }
|
||||
colour *= scan_weight * u.bloom_factor;
|
||||
if (u.enable_gamma != 0) { colour = pow(colour, float3(1.0f / u.output_gamma)); }
|
||||
}
|
||||
|
||||
if (u.mask_type == 1) {
|
||||
float wm = fract(in.pos.x * 0.5f);
|
||||
float3 mask = (wm < 0.5f) ? float3(u.mask_brightness, 1.0f, u.mask_brightness)
|
||||
: float3(1.0f, u.mask_brightness, 1.0f);
|
||||
colour *= mask;
|
||||
} else if (u.mask_type == 2) {
|
||||
float wm = fract(in.pos.x * 0.3333333f);
|
||||
float3 mask = float3(u.mask_brightness);
|
||||
if (wm < 0.3333333f) mask.x = 1.0f;
|
||||
else if (wm < 0.6666666f) mask.y = 1.0f;
|
||||
else mask.z = 1.0f;
|
||||
colour *= mask;
|
||||
}
|
||||
|
||||
return float4(colour, 1.0f);
|
||||
}
|
||||
)";
|
||||
|
||||
} // namespace Rendering::Msl
|
||||
|
||||
#endif // __APPLE__
|
||||
@@ -0,0 +1,168 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
// Fragment shader del shader "postfx": vignette, chroma, scanlines, mask,
|
||||
// gamma, curvature, bleeding i flicker. Els paràmetres venen via uniforms.
|
||||
//
|
||||
// IMPORTANT: mantenir sincronitzat a mà amb data/shaders/postfx.frag. SDL3 GPU
|
||||
// compila aquest string MSL en runtime; no hi ha generador automàtic. Qualsevol
|
||||
// canvi a la struct d'uniforms o a la lògica del GLSL cal replicar-lo ací al
|
||||
// mateix commit. Mida total = 64 bytes (4 × vec4).
|
||||
namespace Rendering::Msl {
|
||||
|
||||
inline constexpr const char* kPostfxFrag = R"(
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct PostVOut {
|
||||
float4 pos [[position]];
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
struct PostFXUniforms {
|
||||
float vignette_strength;
|
||||
float chroma_min;
|
||||
float scanline_strength;
|
||||
float screen_height;
|
||||
float mask_strength;
|
||||
float gamma_strength;
|
||||
float curvature;
|
||||
float bleeding;
|
||||
float pixel_scale;
|
||||
float time;
|
||||
float flicker;
|
||||
float chroma_max;
|
||||
// vec4 #3 — paràmetres de scanlines (exposats per preset YAML)
|
||||
float scan_dark_ratio;
|
||||
float scan_dark_floor;
|
||||
float scan_edge_soft;
|
||||
float pad3;
|
||||
};
|
||||
|
||||
// Mostreig bilinear horitzontal d'un canal RGB. Evita el "tic-tac" del sampler
|
||||
// NEAREST quan l'offset de chroma és subpíxel.
|
||||
static float sampleBilinearX(float2 uv_target, int channel, texture2d<float> scene, sampler samp) {
|
||||
float2 tex_size = float2(scene.get_width(), scene.get_height());
|
||||
float px = uv_target.x * tex_size.x - 0.5f;
|
||||
float p_floor = floor(px);
|
||||
float f = px - p_floor;
|
||||
float4 c0 = scene.sample(samp, float2((p_floor + 0.5f) / tex_size.x, uv_target.y));
|
||||
float4 c1 = scene.sample(samp, float2((p_floor + 1.5f) / tex_size.x, uv_target.y));
|
||||
return mix(c0[channel], c1[channel], f);
|
||||
}
|
||||
|
||||
static float3 rgb_to_ycc(float3 rgb) {
|
||||
return float3(
|
||||
0.299f*rgb.r + 0.587f*rgb.g + 0.114f*rgb.b,
|
||||
-0.169f*rgb.r - 0.331f*rgb.g + 0.500f*rgb.b + 0.5f,
|
||||
0.500f*rgb.r - 0.419f*rgb.g - 0.081f*rgb.b + 0.5f
|
||||
);
|
||||
}
|
||||
static float3 ycc_to_rgb(float3 ycc) {
|
||||
float y = ycc.x;
|
||||
float cb = ycc.y - 0.5f;
|
||||
float cr = ycc.z - 0.5f;
|
||||
return clamp(float3(
|
||||
y + 1.402f*cr,
|
||||
y - 0.344f*cb - 0.714f*cr,
|
||||
y + 1.772f*cb
|
||||
), 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
fragment float4 postfx_fs(PostVOut in [[stage_in]],
|
||||
texture2d<float> scene [[texture(0)]],
|
||||
sampler samp [[sampler(0)]],
|
||||
constant PostFXUniforms& u [[buffer(0)]]) {
|
||||
float2 uv = in.uv;
|
||||
|
||||
if (u.curvature > 0.0f) {
|
||||
float2 c = uv - 0.5f;
|
||||
float rsq = dot(c, c);
|
||||
float2 dist = float2(0.05f, 0.1f) * u.curvature;
|
||||
float2 barrelScale = 1.0f - 0.23f * dist;
|
||||
c += c * (dist * rsq);
|
||||
c *= barrelScale;
|
||||
if (abs(c.x) >= 0.5f || abs(c.y) >= 0.5f) {
|
||||
return float4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
}
|
||||
uv = c + 0.5f;
|
||||
}
|
||||
|
||||
float3 base = scene.sample(samp, uv).rgb;
|
||||
|
||||
float3 colour;
|
||||
if (u.bleeding > 0.0f) {
|
||||
float tw = float(scene.get_width());
|
||||
float step = 1.0f / tw;
|
||||
float3 ycc = rgb_to_ycc(base);
|
||||
float3 ycc_l2 = rgb_to_ycc(scene.sample(samp, uv - float2(2.0f*step, 0.0f)).rgb);
|
||||
float3 ycc_l1 = rgb_to_ycc(scene.sample(samp, uv - float2(1.0f*step, 0.0f)).rgb);
|
||||
float3 ycc_r1 = rgb_to_ycc(scene.sample(samp, uv + float2(1.0f*step, 0.0f)).rgb);
|
||||
float3 ycc_r2 = rgb_to_ycc(scene.sample(samp, uv + float2(2.0f*step, 0.0f)).rgb);
|
||||
ycc.yz = (ycc_l2.yz + ycc_l1.yz*2.0f + ycc.yz*2.0f + ycc_r1.yz*2.0f + ycc_r2.yz) / 8.0f;
|
||||
colour = mix(base, ycc_to_rgb(ycc), u.bleeding);
|
||||
} else {
|
||||
colour = base;
|
||||
}
|
||||
|
||||
// Chroma — varia entre chroma_min i chroma_max via sinusoidal; si min == max
|
||||
// queda estàtic. Mostreig bilinear horitzontal per evitar el "tic-tac" del
|
||||
// NEAREST sampler amb offsets subpíxel.
|
||||
if (u.chroma_min > 0.0f || u.chroma_max > 0.0f) {
|
||||
float ca = mix(u.chroma_min, u.chroma_max, 0.5f + 0.5f * sin(u.time * 7.3f)) * 0.005f;
|
||||
colour.r = sampleBilinearX(uv + float2(ca, 0.0f), 0, scene, samp);
|
||||
colour.b = sampleBilinearX(uv - float2(ca, 0.0f), 2, scene, samp);
|
||||
}
|
||||
|
||||
if (u.gamma_strength > 0.0f) {
|
||||
float3 lin = pow(colour, float3(2.4f));
|
||||
colour = mix(colour, lin, u.gamma_strength);
|
||||
}
|
||||
|
||||
// Scanlines — 3 subpíxels per fila lògica (2 brillants + 1 fosca). Transició
|
||||
// suavitzada amb smoothstep d'ample ≈ 1 píxel físic (estil crtpi: filtratge
|
||||
// analític continu). scan_edge_soft = 0 recupera el step dur de l'original.
|
||||
if (u.scanline_strength > 0.0f) {
|
||||
float ps = max(u.pixel_scale, 1.0f);
|
||||
float sub = fract(uv.y * u.screen_height);
|
||||
float dark_center = 1.0f - u.scan_dark_ratio * 0.5f;
|
||||
float d = abs(sub - dark_center);
|
||||
d = min(d, 1.0f - d);
|
||||
float half_width = u.scan_dark_ratio * 0.5f;
|
||||
float softness = u.scan_edge_soft * 0.5f / ps;
|
||||
float band = 1.0f - smoothstep(half_width - softness, half_width + softness, d);
|
||||
float scan = mix(1.0f, u.scan_dark_floor, band);
|
||||
colour *= mix(1.0f, scan, u.scanline_strength);
|
||||
}
|
||||
|
||||
if (u.gamma_strength > 0.0f) {
|
||||
float3 enc = pow(colour, float3(1.0f/2.2f));
|
||||
colour = mix(colour, enc, u.gamma_strength);
|
||||
}
|
||||
|
||||
float2 d = uv - 0.5f;
|
||||
float vignette = 1.0f - dot(d, d) * u.vignette_strength;
|
||||
colour *= clamp(vignette, 0.0f, 1.0f);
|
||||
|
||||
if (u.mask_strength > 0.0f) {
|
||||
float whichMask = fract(in.pos.x * 0.3333333f);
|
||||
float3 mask = float3(0.80f);
|
||||
if (whichMask < 0.3333333f) mask.x = 1.0f;
|
||||
else if (whichMask < 0.6666667f) mask.y = 1.0f;
|
||||
else mask.z = 1.0f;
|
||||
colour = mix(colour, colour * mask, u.mask_strength);
|
||||
}
|
||||
|
||||
if (u.flicker > 0.0f) {
|
||||
float flicker_wave = sin(u.time * 100.0f) * 0.5f + 0.5f;
|
||||
colour *= 1.0f - u.flicker * 0.04f * flicker_wave;
|
||||
}
|
||||
|
||||
return float4(colour, 1.0f);
|
||||
}
|
||||
)";
|
||||
|
||||
} // namespace Rendering::Msl
|
||||
|
||||
#endif // __APPLE__
|
||||
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
// Vertex shader compartit per tots els pipelines de post-procés:
|
||||
// fullscreen-triangle que cobreix tota l'àrea del swapchain amb UVs a [0,1].
|
||||
namespace Rendering::Msl {
|
||||
|
||||
inline constexpr const char* kPostfxVert = R"(
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct PostVOut {
|
||||
float4 pos [[position]];
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
vertex PostVOut postfx_vs(uint vid [[vertex_id]]) {
|
||||
const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} };
|
||||
const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} };
|
||||
PostVOut out;
|
||||
out.pos = float4(positions[vid], 0.0, 1.0);
|
||||
out.uv = uvs[vid];
|
||||
return out;
|
||||
}
|
||||
)";
|
||||
|
||||
} // namespace Rendering::Msl
|
||||
|
||||
#endif // __APPLE__
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user