refactor(gpu): eliminar GPU compute boids (prevé crash macOS)

Elimina el kernel Metal O(N²) de boids en GPU que causava GPU timeout a macOS amb >50K boles, arrossegant WindowServer fins al crash. - Elimina gpu_boid_buffer.hpp/cpp (GpuBoidBuffer, BallComputeData, BoidParams) - Elimina kBoidComputeMSL i kBallComputeVertMSL de gpu_pipeline - Elimina boid_compute_pipeline_ i ball_compute_pipeline_ - Elimina use_gpu_boids_, boid_params_, ball_screen_uniforms_ de Engine - Elimina syncAndExitGpuBoids() i tot el compute dispatch de render() - Mode BOIDS ara usa sempre boid_manager_ (CPU, spatial hash O(N)) i renderitza via gpu_ball_buffer_ instanced (mateix path que PHYSICS) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-20 08:45:01 +01:00
parent badf92420b
commit d2e7f2ff86
8 changed files with 321 additions and 41 deletions
--- a/source/engine.cpp
+++ b/source/engine.cpp
@@ -237,6 +237,13 @@ bool Engine::initialize(int width, int height, int zoom, bool fullscreen, AppMod
                success = false;
            }

+            gpu_ball_buffer_ = std::make_unique<GpuBallBuffer>();
+            if (!gpu_ball_buffer_->init(gpu_ctx_->device())) {
+                std::cerr << "ERROR: No se pudo crear el ball buffer GPU" << std::endl;
+                success = false;
+            }
+            ball_gpu_data_.reserve(GpuBallBuffer::MAX_BALLS);
+
            offscreen_tex_ = std::make_unique<GpuTexture>();
            if (!offscreen_tex_->createRenderTarget(gpu_ctx_->device(),
                                                    current_screen_width_, current_screen_height_,
@@ -377,8 +384,9 @@ void Engine::shutdown() {
        if (ui_tex_)        { ui_tex_->destroy(gpu_ctx_->device());        ui_tex_.reset(); }
        if (white_tex_)     { white_tex_->destroy(gpu_ctx_->device());     white_tex_.reset(); }
        if (offscreen_tex_) { offscreen_tex_->destroy(gpu_ctx_->device()); offscreen_tex_.reset(); }
-        if (sprite_batch_)  { sprite_batch_->destroy(gpu_ctx_->device());  sprite_batch_.reset(); }
-        if (gpu_pipeline_)  { gpu_pipeline_->destroy(gpu_ctx_->device());  gpu_pipeline_.reset(); }
+        if (sprite_batch_)    { sprite_batch_->destroy(gpu_ctx_->device());    sprite_batch_.reset(); }
+        if (gpu_ball_buffer_) { gpu_ball_buffer_->destroy(gpu_ctx_->device()); gpu_ball_buffer_.reset(); }
+        if (gpu_pipeline_)    { gpu_pipeline_->destroy(gpu_ctx_->device());    gpu_pipeline_.reset(); }
    }

    // Destroy software UI renderer and surface
@@ -437,7 +445,7 @@ void Engine::update() {
        // Modo Figura 3D: actualizar figura polimórfica
        updateShape();
    } else if (current_mode_ == SimulationMode::BOIDS) {
-        // Modo Boids: actualizar comportamiento de enjambre (delegado a BoidManager)
+        // CPU boids: actualizar comportamiento de enjambre (delegado a BoidManager)
        boid_manager_->update(delta_time_);
    }

@@ -544,18 +552,17 @@ void Engine::toggleDepthZoom() {
 // Boids (comportamiento de enjambre)
 void Engine::toggleBoidsMode(bool force_gravity_on) {
    if (current_mode_ == SimulationMode::BOIDS) {
-        // Salir del modo boids (velocidades ya son time-based, no requiere conversión)
+        // Salir del modo boids
        current_mode_ = SimulationMode::PHYSICS;
-        boid_manager_->deactivateBoids(force_gravity_on);  // Pasar parámetro para control preciso
+        boid_manager_->deactivateBoids(force_gravity_on);
    } else {
        // Entrar al modo boids (desde PHYSICS o SHAPE)
        if (current_mode_ == SimulationMode::SHAPE) {
-            // Si estamos en modo shape, salir primero sin forzar gravedad
            shape_manager_->toggleShapeMode(false);
            current_mode_ = SimulationMode::PHYSICS;
        }

-        // Activar modo boids
+        // Activar modo boids en CPU (configura gravedad OFF, inicializa velocidades)
        current_mode_ = SimulationMode::BOIDS;
        boid_manager_->activateBoids();
    }
@@ -728,8 +735,12 @@ void Engine::render() {

    // Sprites (balls)
    const auto& balls = scene_manager_->getBalls();
+    const float sw = static_cast<float>(current_screen_width_);
+    const float sh = static_cast<float>(current_screen_height_);
+
    if (current_mode_ == SimulationMode::SHAPE) {
-        // Bucket sort by depth Z (Painter's Algorithm)
+        // SHAPE mode: bucket sort by depth Z (Painter's Algorithm), with depth scale.
+        // Uses the sprite batch (supports per-sprite scale, needed for depth zoom).
        for (size_t i = 0; i < balls.size(); i++) {
            int b = static_cast<int>(balls[i]->getDepthBrightness() * (DEPTH_SORT_BUCKETS - 1));
            depth_buckets_[std::clamp(b, 0, DEPTH_SORT_BUCKETS - 1)].push_back(i);
@@ -745,39 +756,48 @@ void Engine::render() {
                                         color.r / 255.0f * bf,
                                         color.g / 255.0f * bf,
                                         color.b / 255.0f * bf,
-                                         1.0f, depth_scale,
-                                         static_cast<float>(current_screen_width_),
-                                         static_cast<float>(current_screen_height_));
+                                         1.0f, depth_scale, sw, sh);
            }
            depth_buckets_[b].clear();
        }
    } else {
-        size_t idx = 0;
-        for (const auto& ball : balls) {
-            SDL_FRect pos = ball->getPosition();
+        // PHYSICS / CPU-BOIDS mode: build instanced ball buffer (GPU instanced rendering).
+        // 32 bytes per ball instead of 4×32 bytes per quad — 4× less upload bandwidth.
+        ball_gpu_data_.clear();
+        for (size_t idx = 0; idx < balls.size(); idx++) {
+            SDL_FRect pos = balls[idx]->getPosition();
            Color color = theme_manager_->getInterpolatedColor(idx);
-            sprite_batch_->addSprite(pos.x, pos.y, pos.w, pos.h,
-                                     color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
-                                     1.0f, 1.0f,
-                                     static_cast<float>(current_screen_width_),
-                                     static_cast<float>(current_screen_height_));
-            idx++;
+            // Convert to NDC center + NDC half-size (both positive)
+            float cx = ((pos.x + pos.w * 0.5f) / sw) * 2.0f - 1.0f;
+            float cy = 1.0f - ((pos.y + pos.h * 0.5f) / sh) * 2.0f;
+            float hw = pos.w / sw;
+            float hh = pos.h / sh;
+            ball_gpu_data_.push_back({cx, cy, hw, hh,
+                                      color.r / 255.0f, color.g / 255.0f,
+                                      color.b / 255.0f, 1.0f});
        }
    }

    // UI overlay quad (drawn in Pass 2 over the postfx output)
    sprite_batch_->addFullscreenOverlay();

-    // Upload batch to GPU buffers
+    // Upload sprite batch (background + SHAPE balls + UI overlay quad)
    if (!sprite_batch_->uploadBatch(gpu_ctx_->device(), cmd)) {
        gpu_ctx_->submit(cmd);
        return;
    }

+    // Upload instanced ball buffer (PHYSICS / CPU-BOIDS modes)
+    bool use_instanced_balls = (current_mode_ != SimulationMode::SHAPE) && !ball_gpu_data_.empty();
+    if (use_instanced_balls) {
+        gpu_ball_buffer_->upload(gpu_ctx_->device(), cmd,
+                                  ball_gpu_data_.data(), static_cast<int>(ball_gpu_data_.size()));
+    }
+
    GpuTexture* sprite_tex = (!gpu_textures_.empty())
        ? gpu_textures_[current_texture_index_].get() : nullptr;

-    // === Pass 1: Render background + sprites to offscreen texture ===
+    // === Pass 1: Render background + balls to offscreen texture ===
    if (offscreen_tex_ && offscreen_tex_->isValid() && sprite_tex && sprite_tex->isValid()) {
        SDL_GPUColorTargetInfo ct = {};
        ct.texture     = offscreen_tex_->texture();
@@ -786,22 +806,36 @@ void Engine::render() {
        ct.store_op    = SDL_GPU_STOREOP_STORE;

        SDL_GPURenderPass* pass1 = SDL_BeginGPURenderPass(cmd, &ct, 1, nullptr);
+
+        // Background (white texture tinted by vertex color, via sprite batch)
        SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
-
-        SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
-        SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
-        SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
-        SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
-
-        // Background (white texture tinted by vertex color)
+        {
+            SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
+            SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
+            SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
+            SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
+        }
        if (white_tex_ && white_tex_->isValid() && sprite_batch_->bgIndexCount() > 0) {
            SDL_GPUTextureSamplerBinding tsb = {white_tex_->texture(), white_tex_->sampler()};
            SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
            SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->bgIndexCount(), 1, 0, 0, 0);
        }

-        // Sprites
-        if (sprite_batch_->spriteIndexCount() > 0) {
+        if (use_instanced_balls && gpu_ball_buffer_->count() > 0) {
+            // PHYSICS / CPU-BOIDS: instanced rendering — 6 procedural vertices per instance
+            SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->ballPipeline());
+            SDL_GPUBufferBinding ball_vb = {gpu_ball_buffer_->buffer(), 0};
+            SDL_BindGPUVertexBuffers(pass1, 0, &ball_vb, 1);
+            SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
+            SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
+            SDL_DrawGPUPrimitives(pass1, 6, static_cast<Uint32>(gpu_ball_buffer_->count()), 0, 0);
+        } else if (!use_instanced_balls && sprite_batch_->spriteIndexCount() > 0) {
+            // SHAPE: sprite batch with depth sort (re-bind sprite pipeline + buffers)
+            SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
+            SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
+            SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
+            SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
+            SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
            SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
            SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
            SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->spriteIndexCount(), 1,