refactor(gpu): eliminar GPU compute boids (prevé crash macOS)
Elimina el kernel Metal O(N²) de boids en GPU que causava GPU timeout a macOS amb >50K boles, arrossegant WindowServer fins al crash. - Elimina gpu_boid_buffer.hpp/cpp (GpuBoidBuffer, BallComputeData, BoidParams) - Elimina kBoidComputeMSL i kBallComputeVertMSL de gpu_pipeline - Elimina boid_compute_pipeline_ i ball_compute_pipeline_ - Elimina use_gpu_boids_, boid_params_, ball_screen_uniforms_ de Engine - Elimina syncAndExitGpuBoids() i tot el compute dispatch de render() - Mode BOIDS ara usa sempre boid_manager_ (CPU, spatial hash O(N)) i renderitza via gpu_ball_buffer_ instanced (mateix path que PHYSICS) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
#include "gpu_pipeline.hpp"
|
||||
#include "gpu_sprite_batch.hpp" // for GpuVertex layout
|
||||
#include "gpu_sprite_batch.hpp" // for GpuVertex layout
|
||||
#include "gpu_ball_buffer.hpp" // for BallGPUData layout
|
||||
|
||||
#include <SDL3/SDL_log.h>
|
||||
#include <cstddef> // offsetof
|
||||
#include <cstring> // strlen
|
||||
|
||||
// ============================================================================
|
||||
// MSL Shaders (Metal Shading Language, macOS)
|
||||
@@ -133,6 +135,60 @@ fragment float4 postfx_fs(PostVOut in [[stage_in]],
|
||||
}
|
||||
)";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Ball instanced vertex shader
|
||||
// Reads BallGPUData as per-instance attributes (input_rate = INSTANCE).
|
||||
// Generates a 6-vertex quad (2 triangles) per instance using vertex_id.
|
||||
//
|
||||
// BallGPUData layout:
|
||||
// float2 center [[attribute(0)]] — NDC center (cx, cy)
|
||||
// float2 half [[attribute(1)]] — NDC half-size (hw, hh), both positive
|
||||
// float4 col [[attribute(2)]] — RGBA [0,1]
|
||||
//
|
||||
// NDC convention (SDL / Metal): Y increases upward (+1=top, -1=bottom).
|
||||
// half.x = w/screen_w, half.y = h/screen_h (positive; Y is not flipped)
|
||||
// Vertex order: TL TR BL | TR BR BL (CCW winding, standard Metal)
|
||||
// ---------------------------------------------------------------------------
|
||||
static const char* kBallInstancedVertMSL = R"(
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct BallInstance {
|
||||
float2 center [[attribute(0)]]; // NDC center
|
||||
float2 halfsize [[attribute(1)]]; // NDC half-size (both positive); 'half' is reserved in MSL
|
||||
float4 col [[attribute(2)]];
|
||||
};
|
||||
struct BallVOut {
|
||||
float4 pos [[position]];
|
||||
float2 uv;
|
||||
float4 col;
|
||||
};
|
||||
|
||||
vertex BallVOut ball_instanced_vs(BallInstance inst [[stage_in]],
|
||||
uint vid [[vertex_id]]) {
|
||||
// Offset signs for each of the 6 vertices (TL TR BL | TR BR BL)
|
||||
const float2 offsets[6] = {
|
||||
{-1.0f, 1.0f}, // TL
|
||||
{ 1.0f, 1.0f}, // TR
|
||||
{-1.0f, -1.0f}, // BL
|
||||
{ 1.0f, 1.0f}, // TR (shared)
|
||||
{ 1.0f, -1.0f}, // BR
|
||||
{-1.0f, -1.0f}, // BL (shared)
|
||||
};
|
||||
// UV: TL=(0,0) TR=(1,0) BL=(0,1) BR=(1,1)
|
||||
const float2 uvs[6] = {
|
||||
{0.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 1.0f},
|
||||
{1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f},
|
||||
};
|
||||
float2 pos = inst.center + offsets[vid] * inst.halfsize;
|
||||
BallVOut out;
|
||||
out.pos = float4(pos.x, pos.y, 0.0f, 1.0f);
|
||||
out.uv = uvs[vid];
|
||||
out.col = inst.col;
|
||||
return out;
|
||||
}
|
||||
)";
|
||||
|
||||
// ============================================================================
|
||||
// GpuPipeline implementation
|
||||
// ============================================================================
|
||||
@@ -222,6 +278,71 @@ bool GpuPipeline::init(SDL_GPUDevice* device,
|
||||
return false;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Ball instanced pipeline
|
||||
// Vertex: ball_instanced_vs (BallGPUData per-instance, no index buffer)
|
||||
// Fragment: sprite_fs (same texture+color blend as sprite pipeline)
|
||||
// Targets: offscreen (same as sprite pipeline)
|
||||
// ----------------------------------------------------------------
|
||||
SDL_GPUShader* ball_vert = createShader(device, kBallInstancedVertMSL, "ball_instanced_vs",
|
||||
SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
|
||||
SDL_GPUShader* ball_frag = createShader(device, kSpriteFragMSL, "sprite_fs",
|
||||
SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
|
||||
if (!ball_vert || !ball_frag) {
|
||||
SDL_Log("GpuPipeline: failed to create ball instanced shaders");
|
||||
if (ball_vert) SDL_ReleaseGPUShader(device, ball_vert);
|
||||
if (ball_frag) SDL_ReleaseGPUShader(device, ball_frag);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Vertex input: BallGPUData as per-instance data (step rate = 1 instance)
|
||||
SDL_GPUVertexBufferDescription ball_vb_desc = {};
|
||||
ball_vb_desc.slot = 0;
|
||||
ball_vb_desc.pitch = sizeof(BallGPUData);
|
||||
ball_vb_desc.input_rate = SDL_GPU_VERTEXINPUTRATE_INSTANCE;
|
||||
ball_vb_desc.instance_step_rate = 1;
|
||||
|
||||
SDL_GPUVertexAttribute ball_attrs[3] = {};
|
||||
// attr 0: center (float2) at offset 0
|
||||
ball_attrs[0].location = 0;
|
||||
ball_attrs[0].buffer_slot = 0;
|
||||
ball_attrs[0].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2;
|
||||
ball_attrs[0].offset = static_cast<Uint32>(offsetof(BallGPUData, cx));
|
||||
// attr 1: half-size (float2) at offset 8
|
||||
ball_attrs[1].location = 1;
|
||||
ball_attrs[1].buffer_slot = 0;
|
||||
ball_attrs[1].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2;
|
||||
ball_attrs[1].offset = static_cast<Uint32>(offsetof(BallGPUData, hw));
|
||||
// attr 2: color (float4) at offset 16
|
||||
ball_attrs[2].location = 2;
|
||||
ball_attrs[2].buffer_slot = 0;
|
||||
ball_attrs[2].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT4;
|
||||
ball_attrs[2].offset = static_cast<Uint32>(offsetof(BallGPUData, r));
|
||||
|
||||
SDL_GPUVertexInputState ball_vertex_input = {};
|
||||
ball_vertex_input.vertex_buffer_descriptions = &ball_vb_desc;
|
||||
ball_vertex_input.num_vertex_buffers = 1;
|
||||
ball_vertex_input.vertex_attributes = ball_attrs;
|
||||
ball_vertex_input.num_vertex_attributes = 3;
|
||||
|
||||
SDL_GPUGraphicsPipelineCreateInfo ball_pipe_info = {};
|
||||
ball_pipe_info.vertex_shader = ball_vert;
|
||||
ball_pipe_info.fragment_shader = ball_frag;
|
||||
ball_pipe_info.vertex_input_state = ball_vertex_input;
|
||||
ball_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
|
||||
ball_pipe_info.target_info.num_color_targets = 1;
|
||||
ball_pipe_info.target_info.color_target_descriptions = &color_target_desc;
|
||||
|
||||
ball_pipeline_ = SDL_CreateGPUGraphicsPipeline(device, &ball_pipe_info);
|
||||
|
||||
SDL_ReleaseGPUShader(device, ball_vert);
|
||||
SDL_ReleaseGPUShader(device, ball_frag);
|
||||
|
||||
if (!ball_pipeline_) {
|
||||
SDL_Log("GpuPipeline: ball instanced pipeline creation failed: %s", SDL_GetError());
|
||||
return false;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// UI overlay pipeline (same as sprite but renders to swapchain format)
|
||||
// Reuse sprite shaders with different target format.
|
||||
@@ -275,12 +396,13 @@ bool GpuPipeline::init(SDL_GPUDevice* device,
|
||||
return false;
|
||||
}
|
||||
|
||||
SDL_Log("GpuPipeline: sprite and postfx pipelines created successfully");
|
||||
SDL_Log("GpuPipeline: all pipelines created successfully");
|
||||
return true;
|
||||
}
|
||||
|
||||
void GpuPipeline::destroy(SDL_GPUDevice* device) {
|
||||
if (sprite_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, sprite_pipeline_); sprite_pipeline_ = nullptr; }
|
||||
if (ball_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, ball_pipeline_); ball_pipeline_ = nullptr; }
|
||||
if (postfx_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, postfx_pipeline_); postfx_pipeline_ = nullptr; }
|
||||
}
|
||||
|
||||
@@ -289,7 +411,8 @@ SDL_GPUShader* GpuPipeline::createShader(SDL_GPUDevice* device,
|
||||
const char* entrypoint,
|
||||
SDL_GPUShaderStage stage,
|
||||
Uint32 num_samplers,
|
||||
Uint32 num_uniform_buffers) {
|
||||
Uint32 num_uniform_buffers,
|
||||
Uint32 num_storage_buffers) {
|
||||
SDL_GPUShaderCreateInfo info = {};
|
||||
info.code = reinterpret_cast<const Uint8*>(msl_source);
|
||||
info.code_size = static_cast<size_t>(strlen(msl_source) + 1);
|
||||
@@ -298,7 +421,7 @@ SDL_GPUShader* GpuPipeline::createShader(SDL_GPUDevice* device,
|
||||
info.stage = stage;
|
||||
info.num_samplers = num_samplers;
|
||||
info.num_storage_textures = 0;
|
||||
info.num_storage_buffers = 0;
|
||||
info.num_storage_buffers = num_storage_buffers;
|
||||
info.num_uniform_buffers = num_uniform_buffers;
|
||||
|
||||
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
|
||||
|
||||
Reference in New Issue
Block a user