refactor(gpu): eliminar GPU compute boids (prevé crash macOS)
Elimina el kernel Metal O(N²) de boids en GPU que causava GPU timeout a macOS amb >50K boles, arrossegant WindowServer fins al crash. - Elimina gpu_boid_buffer.hpp/cpp (GpuBoidBuffer, BallComputeData, BoidParams) - Elimina kBoidComputeMSL i kBallComputeVertMSL de gpu_pipeline - Elimina boid_compute_pipeline_ i ball_compute_pipeline_ - Elimina use_gpu_boids_, boid_params_, ball_screen_uniforms_ de Engine - Elimina syncAndExitGpuBoids() i tot el compute dispatch de render() - Mode BOIDS ara usa sempre boid_manager_ (CPU, spatial hash O(N)) i renderitza via gpu_ball_buffer_ instanced (mateix path que PHYSICS) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -237,6 +237,13 @@ bool Engine::initialize(int width, int height, int zoom, bool fullscreen, AppMod
|
||||
success = false;
|
||||
}
|
||||
|
||||
gpu_ball_buffer_ = std::make_unique<GpuBallBuffer>();
|
||||
if (!gpu_ball_buffer_->init(gpu_ctx_->device())) {
|
||||
std::cerr << "ERROR: No se pudo crear el ball buffer GPU" << std::endl;
|
||||
success = false;
|
||||
}
|
||||
ball_gpu_data_.reserve(GpuBallBuffer::MAX_BALLS);
|
||||
|
||||
offscreen_tex_ = std::make_unique<GpuTexture>();
|
||||
if (!offscreen_tex_->createRenderTarget(gpu_ctx_->device(),
|
||||
current_screen_width_, current_screen_height_,
|
||||
@@ -377,8 +384,9 @@ void Engine::shutdown() {
|
||||
if (ui_tex_) { ui_tex_->destroy(gpu_ctx_->device()); ui_tex_.reset(); }
|
||||
if (white_tex_) { white_tex_->destroy(gpu_ctx_->device()); white_tex_.reset(); }
|
||||
if (offscreen_tex_) { offscreen_tex_->destroy(gpu_ctx_->device()); offscreen_tex_.reset(); }
|
||||
if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); }
|
||||
if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); }
|
||||
if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); }
|
||||
if (gpu_ball_buffer_) { gpu_ball_buffer_->destroy(gpu_ctx_->device()); gpu_ball_buffer_.reset(); }
|
||||
if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); }
|
||||
}
|
||||
|
||||
// Destroy software UI renderer and surface
|
||||
@@ -437,7 +445,7 @@ void Engine::update() {
|
||||
// Modo Figura 3D: actualizar figura polimórfica
|
||||
updateShape();
|
||||
} else if (current_mode_ == SimulationMode::BOIDS) {
|
||||
// Modo Boids: actualizar comportamiento de enjambre (delegado a BoidManager)
|
||||
// CPU boids: actualizar comportamiento de enjambre (delegado a BoidManager)
|
||||
boid_manager_->update(delta_time_);
|
||||
}
|
||||
|
||||
@@ -544,18 +552,17 @@ void Engine::toggleDepthZoom() {
|
||||
// Boids (comportamiento de enjambre)
|
||||
void Engine::toggleBoidsMode(bool force_gravity_on) {
|
||||
if (current_mode_ == SimulationMode::BOIDS) {
|
||||
// Salir del modo boids (velocidades ya son time-based, no requiere conversión)
|
||||
// Salir del modo boids
|
||||
current_mode_ = SimulationMode::PHYSICS;
|
||||
boid_manager_->deactivateBoids(force_gravity_on); // Pasar parámetro para control preciso
|
||||
boid_manager_->deactivateBoids(force_gravity_on);
|
||||
} else {
|
||||
// Entrar al modo boids (desde PHYSICS o SHAPE)
|
||||
if (current_mode_ == SimulationMode::SHAPE) {
|
||||
// Si estamos en modo shape, salir primero sin forzar gravedad
|
||||
shape_manager_->toggleShapeMode(false);
|
||||
current_mode_ = SimulationMode::PHYSICS;
|
||||
}
|
||||
|
||||
// Activar modo boids
|
||||
// Activar modo boids en CPU (configura gravedad OFF, inicializa velocidades)
|
||||
current_mode_ = SimulationMode::BOIDS;
|
||||
boid_manager_->activateBoids();
|
||||
}
|
||||
@@ -728,8 +735,12 @@ void Engine::render() {
|
||||
|
||||
// Sprites (balls)
|
||||
const auto& balls = scene_manager_->getBalls();
|
||||
const float sw = static_cast<float>(current_screen_width_);
|
||||
const float sh = static_cast<float>(current_screen_height_);
|
||||
|
||||
if (current_mode_ == SimulationMode::SHAPE) {
|
||||
// Bucket sort by depth Z (Painter's Algorithm)
|
||||
// SHAPE mode: bucket sort by depth Z (Painter's Algorithm), with depth scale.
|
||||
// Uses the sprite batch (supports per-sprite scale, needed for depth zoom).
|
||||
for (size_t i = 0; i < balls.size(); i++) {
|
||||
int b = static_cast<int>(balls[i]->getDepthBrightness() * (DEPTH_SORT_BUCKETS - 1));
|
||||
depth_buckets_[std::clamp(b, 0, DEPTH_SORT_BUCKETS - 1)].push_back(i);
|
||||
@@ -745,39 +756,48 @@ void Engine::render() {
|
||||
color.r / 255.0f * bf,
|
||||
color.g / 255.0f * bf,
|
||||
color.b / 255.0f * bf,
|
||||
1.0f, depth_scale,
|
||||
static_cast<float>(current_screen_width_),
|
||||
static_cast<float>(current_screen_height_));
|
||||
1.0f, depth_scale, sw, sh);
|
||||
}
|
||||
depth_buckets_[b].clear();
|
||||
}
|
||||
} else {
|
||||
size_t idx = 0;
|
||||
for (const auto& ball : balls) {
|
||||
SDL_FRect pos = ball->getPosition();
|
||||
// PHYSICS / CPU-BOIDS mode: build instanced ball buffer (GPU instanced rendering).
|
||||
// 32 bytes per ball instead of 4×32 bytes per quad — 4× less upload bandwidth.
|
||||
ball_gpu_data_.clear();
|
||||
for (size_t idx = 0; idx < balls.size(); idx++) {
|
||||
SDL_FRect pos = balls[idx]->getPosition();
|
||||
Color color = theme_manager_->getInterpolatedColor(idx);
|
||||
sprite_batch_->addSprite(pos.x, pos.y, pos.w, pos.h,
|
||||
color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
|
||||
1.0f, 1.0f,
|
||||
static_cast<float>(current_screen_width_),
|
||||
static_cast<float>(current_screen_height_));
|
||||
idx++;
|
||||
// Convert to NDC center + NDC half-size (both positive)
|
||||
float cx = ((pos.x + pos.w * 0.5f) / sw) * 2.0f - 1.0f;
|
||||
float cy = 1.0f - ((pos.y + pos.h * 0.5f) / sh) * 2.0f;
|
||||
float hw = pos.w / sw;
|
||||
float hh = pos.h / sh;
|
||||
ball_gpu_data_.push_back({cx, cy, hw, hh,
|
||||
color.r / 255.0f, color.g / 255.0f,
|
||||
color.b / 255.0f, 1.0f});
|
||||
}
|
||||
}
|
||||
|
||||
// UI overlay quad (drawn in Pass 2 over the postfx output)
|
||||
sprite_batch_->addFullscreenOverlay();
|
||||
|
||||
// Upload batch to GPU buffers
|
||||
// Upload sprite batch (background + SHAPE balls + UI overlay quad)
|
||||
if (!sprite_batch_->uploadBatch(gpu_ctx_->device(), cmd)) {
|
||||
gpu_ctx_->submit(cmd);
|
||||
return;
|
||||
}
|
||||
|
||||
// Upload instanced ball buffer (PHYSICS / CPU-BOIDS modes)
|
||||
bool use_instanced_balls = (current_mode_ != SimulationMode::SHAPE) && !ball_gpu_data_.empty();
|
||||
if (use_instanced_balls) {
|
||||
gpu_ball_buffer_->upload(gpu_ctx_->device(), cmd,
|
||||
ball_gpu_data_.data(), static_cast<int>(ball_gpu_data_.size()));
|
||||
}
|
||||
|
||||
GpuTexture* sprite_tex = (!gpu_textures_.empty())
|
||||
? gpu_textures_[current_texture_index_].get() : nullptr;
|
||||
|
||||
// === Pass 1: Render background + sprites to offscreen texture ===
|
||||
// === Pass 1: Render background + balls to offscreen texture ===
|
||||
if (offscreen_tex_ && offscreen_tex_->isValid() && sprite_tex && sprite_tex->isValid()) {
|
||||
SDL_GPUColorTargetInfo ct = {};
|
||||
ct.texture = offscreen_tex_->texture();
|
||||
@@ -786,22 +806,36 @@ void Engine::render() {
|
||||
ct.store_op = SDL_GPU_STOREOP_STORE;
|
||||
|
||||
SDL_GPURenderPass* pass1 = SDL_BeginGPURenderPass(cmd, &ct, 1, nullptr);
|
||||
|
||||
// Background (white texture tinted by vertex color, via sprite batch)
|
||||
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
|
||||
|
||||
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
|
||||
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
|
||||
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
|
||||
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
|
||||
|
||||
// Background (white texture tinted by vertex color)
|
||||
{
|
||||
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
|
||||
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
|
||||
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
|
||||
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
|
||||
}
|
||||
if (white_tex_ && white_tex_->isValid() && sprite_batch_->bgIndexCount() > 0) {
|
||||
SDL_GPUTextureSamplerBinding tsb = {white_tex_->texture(), white_tex_->sampler()};
|
||||
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
||||
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->bgIndexCount(), 1, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Sprites
|
||||
if (sprite_batch_->spriteIndexCount() > 0) {
|
||||
if (use_instanced_balls && gpu_ball_buffer_->count() > 0) {
|
||||
// PHYSICS / CPU-BOIDS: instanced rendering — 6 procedural vertices per instance
|
||||
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->ballPipeline());
|
||||
SDL_GPUBufferBinding ball_vb = {gpu_ball_buffer_->buffer(), 0};
|
||||
SDL_BindGPUVertexBuffers(pass1, 0, &ball_vb, 1);
|
||||
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
|
||||
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
||||
SDL_DrawGPUPrimitives(pass1, 6, static_cast<Uint32>(gpu_ball_buffer_->count()), 0, 0);
|
||||
} else if (!use_instanced_balls && sprite_batch_->spriteIndexCount() > 0) {
|
||||
// SHAPE: sprite batch with depth sort (re-bind sprite pipeline + buffers)
|
||||
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
|
||||
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
|
||||
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
|
||||
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
|
||||
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
|
||||
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
|
||||
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
||||
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->spriteIndexCount(), 1,
|
||||
|
||||
Reference in New Issue
Block a user