#include "gpu_pipeline.hpp" #include #include // for std::array #include // offsetof #include // strlen #include "gpu_ball_buffer.hpp" // for BallGPUData layout #include "gpu_sprite_batch.hpp" // for GpuVertex layout #ifndef __APPLE__ // Generated at build time by CMake + glslc (see cmake/spv_to_header.cmake) #include "ball_vert_spv.h" #include "postfx_frag_spv.h" #include "postfx_vert_spv.h" #include "sprite_frag_spv.h" #include "sprite_vert_spv.h" #endif #ifdef __APPLE__ // ============================================================================ // MSL Shaders (Metal Shading Language, macOS) // ============================================================================ // --------------------------------------------------------------------------- // Sprite vertex shader // Input: GpuVertex (pos=NDC float2, uv float2, col float4) // Output: position, uv, col forwarded to fragment stage // --------------------------------------------------------------------------- static const char* kSpriteVertMSL = R"( #include using namespace metal; struct SpriteVIn { float2 pos [[attribute(0)]]; float2 uv [[attribute(1)]]; float4 col [[attribute(2)]]; }; struct SpriteVOut { float4 pos [[position]]; float2 uv; float4 col; }; vertex SpriteVOut sprite_vs(SpriteVIn in [[stage_in]]) { SpriteVOut out; out.pos = float4(in.pos, 0.0, 1.0); out.uv = in.uv; out.col = in.col; return out; } )"; // --------------------------------------------------------------------------- // Sprite fragment shader // Samples a texture and multiplies by vertex color (for tinting + alpha). // --------------------------------------------------------------------------- static const char* kSpriteFragMSL = R"( #include using namespace metal; struct SpriteVOut { float4 pos [[position]]; float2 uv; float4 col; }; fragment float4 sprite_fs(SpriteVOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samp [[sampler(0)]]) { float4 t = tex.sample(samp, in.uv); return float4(t.rgb * in.col.rgb, t.a * in.col.a); } )"; // --------------------------------------------------------------------------- // PostFX vertex shader // Generates a full-screen triangle from vertex_id (no vertex buffer needed). // UV mapping: NDC(-1,-1)→UV(0,1) NDC(-1,3)→UV(0,-1) NDC(3,-1)→UV(2,1) // --------------------------------------------------------------------------- static const char* kPostFXVertMSL = R"( #include using namespace metal; struct PostVOut { float4 pos [[position]]; float2 uv; }; vertex PostVOut postfx_vs(uint vid [[vertex_id]]) { const float2 positions[3] = { {-1.0, -1.0}, {3.0, -1.0}, {-1.0, 3.0} }; const float2 uvs[3] = { { 0.0, 1.0}, {2.0, 1.0}, { 0.0,-1.0} }; PostVOut out; out.pos = float4(positions[vid], 0.0, 1.0); out.uv = uvs[vid]; return out; } )"; // --------------------------------------------------------------------------- // PostFX fragment shader // Effects driven by PostFXUniforms (uniform buffer slot 0): // - Chromatic aberration: RGB channel UV offset // - Scanlines: sin-wave intensity modulation // - Vignette: radial edge darkening // MSL binding for fragment uniform buffer 0 with 1 sampler, 0 storage: // constant PostFXUniforms& u [[buffer(0)]] // --------------------------------------------------------------------------- static const char* kPostFXFragMSL = R"( #include using namespace metal; struct PostVOut { float4 pos [[position]]; float2 uv; }; struct PostFXUniforms { float vignette_strength; float chroma_strength; float scanline_strength; float screen_height; }; fragment float4 postfx_fs(PostVOut in [[stage_in]], texture2d scene [[texture(0)]], sampler samp [[sampler(0)]], constant PostFXUniforms& u [[buffer(0)]]) { // Chromatic aberration: offset R and B channels horizontally float ca = u.chroma_strength * 0.005; float4 color; color.r = scene.sample(samp, in.uv + float2( ca, 0.0)).r; color.g = scene.sample(samp, in.uv ).g; color.b = scene.sample(samp, in.uv - float2( ca, 0.0)).b; color.a = scene.sample(samp, in.uv ).a; // Scanlines: horizontal sine-wave at ~360 lines (one dark band per 2 px at 720p) float scan = 0.85 + 0.15 * sin(in.uv.y * 3.14159265 * u.screen_height); color.rgb *= mix(1.0, scan, u.scanline_strength); // Vignette: radial edge darkening float2 d = in.uv - float2(0.5, 0.5); float vignette = 1.0 - dot(d, d) * u.vignette_strength; color.rgb *= clamp(vignette, 0.0, 1.0); return color; } )"; // --------------------------------------------------------------------------- // Ball instanced vertex shader // Reads BallGPUData as per-instance attributes (input_rate = INSTANCE). // Generates a 6-vertex quad (2 triangles) per instance using vertex_id. // // BallGPUData layout: // float2 center [[attribute(0)]] — NDC center (cx, cy) // float2 half [[attribute(1)]] — NDC half-size (hw, hh), both positive // float4 col [[attribute(2)]] — RGBA [0,1] // // NDC convention (SDL / Metal): Y increases upward (+1=top, -1=bottom). // half.x = w/screen_w, half.y = h/screen_h (positive; Y is not flipped) // Vertex order: TL TR BL | TR BR BL (CCW winding, standard Metal) // --------------------------------------------------------------------------- static const char* kBallInstancedVertMSL = R"( #include using namespace metal; struct BallInstance { float2 center [[attribute(0)]]; // NDC center float2 halfsize [[attribute(1)]]; // NDC half-size (both positive); 'half' is reserved in MSL float4 col [[attribute(2)]]; }; struct BallVOut { float4 pos [[position]]; float2 uv; float4 col; }; vertex BallVOut ball_instanced_vs(BallInstance inst [[stage_in]], uint vid [[vertex_id]]) { // Offset signs for each of the 6 vertices (TL TR BL | TR BR BL) const float2 offsets[6] = { {-1.0f, 1.0f}, // TL { 1.0f, 1.0f}, // TR {-1.0f, -1.0f}, // BL { 1.0f, 1.0f}, // TR (shared) { 1.0f, -1.0f}, // BR {-1.0f, -1.0f}, // BL (shared) }; // UV: TL=(0,0) TR=(1,0) BL=(0,1) BR=(1,1) const float2 uvs[6] = { {0.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 1.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}, }; float2 pos = inst.center + offsets[vid] * inst.halfsize; BallVOut out; out.pos = float4(pos.x, pos.y, 0.0f, 1.0f); out.uv = uvs[vid]; out.col = inst.col; return out; } )"; #endif // __APPLE__ // ============================================================================ // GpuPipeline implementation // ============================================================================ auto GpuPipeline::init(SDL_GPUDevice* device, SDL_GPUTextureFormat target_format, SDL_GPUTextureFormat offscreen_format) -> bool { SDL_GPUShaderFormat supported = SDL_GetGPUShaderFormats(device); #ifdef __APPLE__ if (!(supported & SDL_GPU_SHADERFORMAT_MSL)) { SDL_Log("GpuPipeline: MSL not supported (format mask=%u)", supported); return false; } #else if ((supported & SDL_GPU_SHADERFORMAT_SPIRV) == 0u) { SDL_Log("GpuPipeline: SPIRV not supported (format mask=%u)", supported); return false; } #endif // ---------------------------------------------------------------- // Sprite pipeline // ---------------------------------------------------------------- #ifdef __APPLE__ SDL_GPUShader* sprite_vert = createShader(device, kSpriteVertMSL, "sprite_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); SDL_GPUShader* sprite_frag = createShader(device, kSpriteFragMSL, "sprite_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0); #else SDL_GPUShader* sprite_vert = createShaderSPIRV(device, ksprite_vert_spv, ksprite_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); SDL_GPUShader* sprite_frag = createShaderSPIRV(device, ksprite_frag_spv, ksprite_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0); #endif if ((sprite_vert == nullptr) || (sprite_frag == nullptr)) { SDL_Log("GpuPipeline: failed to create sprite shaders"); if (sprite_vert != nullptr) { SDL_ReleaseGPUShader(device, sprite_vert); } if (sprite_frag != nullptr) { SDL_ReleaseGPUShader(device, sprite_frag); } return false; } // Vertex input: GpuVertex layout SDL_GPUVertexBufferDescription vb_desc = {}; vb_desc.slot = 0; vb_desc.pitch = sizeof(GpuVertex); vb_desc.input_rate = SDL_GPU_VERTEXINPUTRATE_VERTEX; vb_desc.instance_step_rate = 0; std::array attrs = {}; attrs[0].location = 0; attrs[0].buffer_slot = 0; attrs[0].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2; attrs[0].offset = static_cast(offsetof(GpuVertex, x)); attrs[1].location = 1; attrs[1].buffer_slot = 0; attrs[1].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2; attrs[1].offset = static_cast(offsetof(GpuVertex, u)); attrs[2].location = 2; attrs[2].buffer_slot = 0; attrs[2].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT4; attrs[2].offset = static_cast(offsetof(GpuVertex, r)); SDL_GPUVertexInputState vertex_input = {}; vertex_input.vertex_buffer_descriptions = &vb_desc; vertex_input.num_vertex_buffers = 1; vertex_input.vertex_attributes = attrs.data(); vertex_input.num_vertex_attributes = 3; // Alpha blend state (SRC_ALPHA, ONE_MINUS_SRC_ALPHA) SDL_GPUColorTargetBlendState blend = {}; blend.enable_blend = true; blend.src_color_blendfactor = SDL_GPU_BLENDFACTOR_SRC_ALPHA; blend.dst_color_blendfactor = SDL_GPU_BLENDFACTOR_ONE_MINUS_SRC_ALPHA; blend.color_blend_op = SDL_GPU_BLENDOP_ADD; blend.src_alpha_blendfactor = SDL_GPU_BLENDFACTOR_ONE; blend.dst_alpha_blendfactor = SDL_GPU_BLENDFACTOR_ONE_MINUS_SRC_ALPHA; blend.alpha_blend_op = SDL_GPU_BLENDOP_ADD; blend.enable_color_write_mask = false; // write all channels SDL_GPUColorTargetDescription color_target_desc = {}; color_target_desc.format = offscreen_format; color_target_desc.blend_state = blend; SDL_GPUGraphicsPipelineCreateInfo sprite_pipe_info = {}; sprite_pipe_info.vertex_shader = sprite_vert; sprite_pipe_info.fragment_shader = sprite_frag; sprite_pipe_info.vertex_input_state = vertex_input; sprite_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST; sprite_pipe_info.target_info.num_color_targets = 1; sprite_pipe_info.target_info.color_target_descriptions = &color_target_desc; sprite_pipeline_ = SDL_CreateGPUGraphicsPipeline(device, &sprite_pipe_info); SDL_ReleaseGPUShader(device, sprite_vert); SDL_ReleaseGPUShader(device, sprite_frag); if (sprite_pipeline_ == nullptr) { SDL_Log("GpuPipeline: sprite pipeline creation failed: %s", SDL_GetError()); return false; } // ---------------------------------------------------------------- // Ball instanced pipeline // Vertex: ball_instanced_vs (BallGPUData per-instance, no index buffer) // Fragment: sprite_fs (same texture+color blend as sprite pipeline) // Targets: offscreen (same as sprite pipeline) // ---------------------------------------------------------------- #ifdef __APPLE__ SDL_GPUShader* ball_vert = createShader(device, kBallInstancedVertMSL, "ball_instanced_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); SDL_GPUShader* ball_frag = createShader(device, kSpriteFragMSL, "sprite_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0); #else SDL_GPUShader* ball_vert = createShaderSPIRV(device, kball_vert_spv, kball_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); SDL_GPUShader* ball_frag = createShaderSPIRV(device, ksprite_frag_spv, ksprite_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0); #endif if ((ball_vert == nullptr) || (ball_frag == nullptr)) { SDL_Log("GpuPipeline: failed to create ball instanced shaders"); if (ball_vert != nullptr) { SDL_ReleaseGPUShader(device, ball_vert); } if (ball_frag != nullptr) { SDL_ReleaseGPUShader(device, ball_frag); } return false; } // Vertex input: BallGPUData as per-instance data (step rate = 1 instance) SDL_GPUVertexBufferDescription ball_vb_desc = {}; ball_vb_desc.slot = 0; ball_vb_desc.pitch = sizeof(BallGPUData); ball_vb_desc.input_rate = SDL_GPU_VERTEXINPUTRATE_INSTANCE; ball_vb_desc.instance_step_rate = 1; std::array ball_attrs = {}; // attr 0: center (float2) at offset 0 ball_attrs[0].location = 0; ball_attrs[0].buffer_slot = 0; ball_attrs[0].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2; ball_attrs[0].offset = static_cast(offsetof(BallGPUData, cx)); // attr 1: half-size (float2) at offset 8 ball_attrs[1].location = 1; ball_attrs[1].buffer_slot = 0; ball_attrs[1].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2; ball_attrs[1].offset = static_cast(offsetof(BallGPUData, hw)); // attr 2: color (float4) at offset 16 ball_attrs[2].location = 2; ball_attrs[2].buffer_slot = 0; ball_attrs[2].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT4; ball_attrs[2].offset = static_cast(offsetof(BallGPUData, r)); SDL_GPUVertexInputState ball_vertex_input = {}; ball_vertex_input.vertex_buffer_descriptions = &ball_vb_desc; ball_vertex_input.num_vertex_buffers = 1; ball_vertex_input.vertex_attributes = ball_attrs.data(); ball_vertex_input.num_vertex_attributes = 3; SDL_GPUGraphicsPipelineCreateInfo ball_pipe_info = {}; ball_pipe_info.vertex_shader = ball_vert; ball_pipe_info.fragment_shader = ball_frag; ball_pipe_info.vertex_input_state = ball_vertex_input; ball_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST; ball_pipe_info.target_info.num_color_targets = 1; ball_pipe_info.target_info.color_target_descriptions = &color_target_desc; ball_pipeline_ = SDL_CreateGPUGraphicsPipeline(device, &ball_pipe_info); SDL_ReleaseGPUShader(device, ball_vert); SDL_ReleaseGPUShader(device, ball_frag); if (ball_pipeline_ == nullptr) { SDL_Log("GpuPipeline: ball instanced pipeline creation failed: %s", SDL_GetError()); return false; } // ---------------------------------------------------------------- // UI overlay pipeline (same as sprite but renders to swapchain format) // Reuse sprite shaders with different target format. // We create a second version of the sprite pipeline for swapchain. // ---------------------------------------------------------------- // (postfx pipeline targets swapchain; UI overlay also targets swapchain // but needs its own pipeline with swapchain format.) // For simplicity, the sprite pipeline is used for the offscreen pass only. // The UI overlay is composited via a separate postfx-like pass below. // ---------------------------------------------------------------- // PostFX pipeline // ---------------------------------------------------------------- #ifdef __APPLE__ SDL_GPUShader* postfx_vert = createShader(device, kPostFXVertMSL, "postfx_vs", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); SDL_GPUShader* postfx_frag = createShader(device, kPostFXFragMSL, "postfx_fs", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1); #else SDL_GPUShader* postfx_vert = createShaderSPIRV(device, kpostfx_vert_spv, kpostfx_vert_spv_size, "main", SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); SDL_GPUShader* postfx_frag = createShaderSPIRV(device, kpostfx_frag_spv, kpostfx_frag_spv_size, "main", SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 1); #endif if ((postfx_vert == nullptr) || (postfx_frag == nullptr)) { SDL_Log("GpuPipeline: failed to create postfx shaders"); if (postfx_vert != nullptr) { SDL_ReleaseGPUShader(device, postfx_vert); } if (postfx_frag != nullptr) { SDL_ReleaseGPUShader(device, postfx_frag); } return false; } // PostFX: no vertex input (uses vertex_id), no blend (replace output) SDL_GPUColorTargetBlendState no_blend = {}; no_blend.enable_blend = false; no_blend.enable_color_write_mask = false; SDL_GPUColorTargetDescription postfx_target_desc = {}; postfx_target_desc.format = target_format; postfx_target_desc.blend_state = no_blend; SDL_GPUVertexInputState no_input = {}; SDL_GPUGraphicsPipelineCreateInfo postfx_pipe_info = {}; postfx_pipe_info.vertex_shader = postfx_vert; postfx_pipe_info.fragment_shader = postfx_frag; postfx_pipe_info.vertex_input_state = no_input; postfx_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST; postfx_pipe_info.target_info.num_color_targets = 1; postfx_pipe_info.target_info.color_target_descriptions = &postfx_target_desc; postfx_pipeline_ = SDL_CreateGPUGraphicsPipeline(device, &postfx_pipe_info); SDL_ReleaseGPUShader(device, postfx_vert); SDL_ReleaseGPUShader(device, postfx_frag); if (postfx_pipeline_ == nullptr) { SDL_Log("GpuPipeline: postfx pipeline creation failed: %s", SDL_GetError()); return false; } SDL_Log("GpuPipeline: all pipelines created successfully"); return true; } void GpuPipeline::destroy(SDL_GPUDevice* device) { if (sprite_pipeline_ != nullptr) { SDL_ReleaseGPUGraphicsPipeline(device, sprite_pipeline_); sprite_pipeline_ = nullptr; } if (ball_pipeline_ != nullptr) { SDL_ReleaseGPUGraphicsPipeline(device, ball_pipeline_); ball_pipeline_ = nullptr; } if (postfx_pipeline_ != nullptr) { SDL_ReleaseGPUGraphicsPipeline(device, postfx_pipeline_); postfx_pipeline_ = nullptr; } } auto GpuPipeline::createShaderSPIRV(SDL_GPUDevice* device, const uint8_t* spv_code, size_t spv_size, const char* entrypoint, SDL_GPUShaderStage stage, Uint32 num_samplers, Uint32 num_uniform_buffers, Uint32 num_storage_buffers) -> SDL_GPUShader* { SDL_GPUShaderCreateInfo info = {}; info.code = spv_code; info.code_size = spv_size; info.entrypoint = entrypoint; info.format = SDL_GPU_SHADERFORMAT_SPIRV; info.stage = stage; info.num_samplers = num_samplers; info.num_storage_textures = 0; info.num_storage_buffers = num_storage_buffers; info.num_uniform_buffers = num_uniform_buffers; SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info); if (shader == nullptr) { SDL_Log("GpuPipeline: SPIRV shader '%s' failed: %s", entrypoint, SDL_GetError()); } return shader; } auto GpuPipeline::createShader(SDL_GPUDevice* device, const char* msl_source, const char* entrypoint, SDL_GPUShaderStage stage, Uint32 num_samplers, Uint32 num_uniform_buffers, Uint32 num_storage_buffers) -> SDL_GPUShader* { SDL_GPUShaderCreateInfo info = {}; info.code = reinterpret_cast(msl_source); info.code_size = static_cast(strlen(msl_source) + 1); info.entrypoint = entrypoint; info.format = SDL_GPU_SHADERFORMAT_MSL; info.stage = stage; info.num_samplers = num_samplers; info.num_storage_textures = 0; info.num_storage_buffers = num_storage_buffers; info.num_uniform_buffers = num_uniform_buffers; SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info); if (shader == nullptr) { SDL_Log("GpuPipeline: shader '%s' failed: %s", entrypoint, SDL_GetError()); } return shader; }