refactor(gpu): eliminar GPU compute boids (prevé crash macOS)

Elimina el kernel Metal O(N²) de boids en GPU que causava GPU timeout
a macOS amb >50K boles, arrossegant WindowServer fins al crash.

- Elimina gpu_boid_buffer.hpp/cpp (GpuBoidBuffer, BallComputeData, BoidParams)
- Elimina kBoidComputeMSL i kBallComputeVertMSL de gpu_pipeline
- Elimina boid_compute_pipeline_ i ball_compute_pipeline_
- Elimina use_gpu_boids_, boid_params_, ball_screen_uniforms_ de Engine
- Elimina syncAndExitGpuBoids() i tot el compute dispatch de render()
- Mode BOIDS ara usa sempre boid_manager_ (CPU, spatial hash O(N))
  i renderitza via gpu_ball_buffer_ instanced (mateix path que PHYSICS)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-20 08:45:01 +01:00
parent badf92420b
commit d2e7f2ff86
8 changed files with 321 additions and 41 deletions

View File

@@ -237,6 +237,13 @@ bool Engine::initialize(int width, int height, int zoom, bool fullscreen, AppMod
success = false;
}
gpu_ball_buffer_ = std::make_unique<GpuBallBuffer>();
if (!gpu_ball_buffer_->init(gpu_ctx_->device())) {
std::cerr << "ERROR: No se pudo crear el ball buffer GPU" << std::endl;
success = false;
}
ball_gpu_data_.reserve(GpuBallBuffer::MAX_BALLS);
offscreen_tex_ = std::make_unique<GpuTexture>();
if (!offscreen_tex_->createRenderTarget(gpu_ctx_->device(),
current_screen_width_, current_screen_height_,
@@ -377,8 +384,9 @@ void Engine::shutdown() {
if (ui_tex_) { ui_tex_->destroy(gpu_ctx_->device()); ui_tex_.reset(); }
if (white_tex_) { white_tex_->destroy(gpu_ctx_->device()); white_tex_.reset(); }
if (offscreen_tex_) { offscreen_tex_->destroy(gpu_ctx_->device()); offscreen_tex_.reset(); }
if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); }
if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); }
if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); }
if (gpu_ball_buffer_) { gpu_ball_buffer_->destroy(gpu_ctx_->device()); gpu_ball_buffer_.reset(); }
if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); }
}
// Destroy software UI renderer and surface
@@ -437,7 +445,7 @@ void Engine::update() {
// Modo Figura 3D: actualizar figura polimórfica
updateShape();
} else if (current_mode_ == SimulationMode::BOIDS) {
// Modo Boids: actualizar comportamiento de enjambre (delegado a BoidManager)
// CPU boids: actualizar comportamiento de enjambre (delegado a BoidManager)
boid_manager_->update(delta_time_);
}
@@ -544,18 +552,17 @@ void Engine::toggleDepthZoom() {
// Boids (comportamiento de enjambre)
void Engine::toggleBoidsMode(bool force_gravity_on) {
if (current_mode_ == SimulationMode::BOIDS) {
// Salir del modo boids (velocidades ya son time-based, no requiere conversión)
// Salir del modo boids
current_mode_ = SimulationMode::PHYSICS;
boid_manager_->deactivateBoids(force_gravity_on); // Pasar parámetro para control preciso
boid_manager_->deactivateBoids(force_gravity_on);
} else {
// Entrar al modo boids (desde PHYSICS o SHAPE)
if (current_mode_ == SimulationMode::SHAPE) {
// Si estamos en modo shape, salir primero sin forzar gravedad
shape_manager_->toggleShapeMode(false);
current_mode_ = SimulationMode::PHYSICS;
}
// Activar modo boids
// Activar modo boids en CPU (configura gravedad OFF, inicializa velocidades)
current_mode_ = SimulationMode::BOIDS;
boid_manager_->activateBoids();
}
@@ -728,8 +735,12 @@ void Engine::render() {
// Sprites (balls)
const auto& balls = scene_manager_->getBalls();
const float sw = static_cast<float>(current_screen_width_);
const float sh = static_cast<float>(current_screen_height_);
if (current_mode_ == SimulationMode::SHAPE) {
// Bucket sort by depth Z (Painter's Algorithm)
// SHAPE mode: bucket sort by depth Z (Painter's Algorithm), with depth scale.
// Uses the sprite batch (supports per-sprite scale, needed for depth zoom).
for (size_t i = 0; i < balls.size(); i++) {
int b = static_cast<int>(balls[i]->getDepthBrightness() * (DEPTH_SORT_BUCKETS - 1));
depth_buckets_[std::clamp(b, 0, DEPTH_SORT_BUCKETS - 1)].push_back(i);
@@ -745,39 +756,48 @@ void Engine::render() {
color.r / 255.0f * bf,
color.g / 255.0f * bf,
color.b / 255.0f * bf,
1.0f, depth_scale,
static_cast<float>(current_screen_width_),
static_cast<float>(current_screen_height_));
1.0f, depth_scale, sw, sh);
}
depth_buckets_[b].clear();
}
} else {
size_t idx = 0;
for (const auto& ball : balls) {
SDL_FRect pos = ball->getPosition();
// PHYSICS / CPU-BOIDS mode: build instanced ball buffer (GPU instanced rendering).
// 32 bytes per ball instead of 4×32 bytes per quad — 4× less upload bandwidth.
ball_gpu_data_.clear();
for (size_t idx = 0; idx < balls.size(); idx++) {
SDL_FRect pos = balls[idx]->getPosition();
Color color = theme_manager_->getInterpolatedColor(idx);
sprite_batch_->addSprite(pos.x, pos.y, pos.w, pos.h,
color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
1.0f, 1.0f,
static_cast<float>(current_screen_width_),
static_cast<float>(current_screen_height_));
idx++;
// Convert to NDC center + NDC half-size (both positive)
float cx = ((pos.x + pos.w * 0.5f) / sw) * 2.0f - 1.0f;
float cy = 1.0f - ((pos.y + pos.h * 0.5f) / sh) * 2.0f;
float hw = pos.w / sw;
float hh = pos.h / sh;
ball_gpu_data_.push_back({cx, cy, hw, hh,
color.r / 255.0f, color.g / 255.0f,
color.b / 255.0f, 1.0f});
}
}
// UI overlay quad (drawn in Pass 2 over the postfx output)
sprite_batch_->addFullscreenOverlay();
// Upload batch to GPU buffers
// Upload sprite batch (background + SHAPE balls + UI overlay quad)
if (!sprite_batch_->uploadBatch(gpu_ctx_->device(), cmd)) {
gpu_ctx_->submit(cmd);
return;
}
// Upload instanced ball buffer (PHYSICS / CPU-BOIDS modes)
bool use_instanced_balls = (current_mode_ != SimulationMode::SHAPE) && !ball_gpu_data_.empty();
if (use_instanced_balls) {
gpu_ball_buffer_->upload(gpu_ctx_->device(), cmd,
ball_gpu_data_.data(), static_cast<int>(ball_gpu_data_.size()));
}
GpuTexture* sprite_tex = (!gpu_textures_.empty())
? gpu_textures_[current_texture_index_].get() : nullptr;
// === Pass 1: Render background + sprites to offscreen texture ===
// === Pass 1: Render background + balls to offscreen texture ===
if (offscreen_tex_ && offscreen_tex_->isValid() && sprite_tex && sprite_tex->isValid()) {
SDL_GPUColorTargetInfo ct = {};
ct.texture = offscreen_tex_->texture();
@@ -786,22 +806,36 @@ void Engine::render() {
ct.store_op = SDL_GPU_STOREOP_STORE;
SDL_GPURenderPass* pass1 = SDL_BeginGPURenderPass(cmd, &ct, 1, nullptr);
// Background (white texture tinted by vertex color, via sprite batch)
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
// Background (white texture tinted by vertex color)
{
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
}
if (white_tex_ && white_tex_->isValid() && sprite_batch_->bgIndexCount() > 0) {
SDL_GPUTextureSamplerBinding tsb = {white_tex_->texture(), white_tex_->sampler()};
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->bgIndexCount(), 1, 0, 0, 0);
}
// Sprites
if (sprite_batch_->spriteIndexCount() > 0) {
if (use_instanced_balls && gpu_ball_buffer_->count() > 0) {
// PHYSICS / CPU-BOIDS: instanced rendering — 6 procedural vertices per instance
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->ballPipeline());
SDL_GPUBufferBinding ball_vb = {gpu_ball_buffer_->buffer(), 0};
SDL_BindGPUVertexBuffers(pass1, 0, &ball_vb, 1);
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
SDL_DrawGPUPrimitives(pass1, 6, static_cast<Uint32>(gpu_ball_buffer_->count()), 0, 0);
} else if (!use_instanced_balls && sprite_batch_->spriteIndexCount() > 0) {
// SHAPE: sprite batch with depth sort (re-bind sprite pipeline + buffers)
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->spriteIndexCount(), 1,