refactor(gpu): eliminar GPU compute boids (prevé crash macOS)
Elimina el kernel Metal O(N²) de boids en GPU que causava GPU timeout a macOS amb >50K boles, arrossegant WindowServer fins al crash. - Elimina gpu_boid_buffer.hpp/cpp (GpuBoidBuffer, BallComputeData, BoidParams) - Elimina kBoidComputeMSL i kBallComputeVertMSL de gpu_pipeline - Elimina boid_compute_pipeline_ i ball_compute_pipeline_ - Elimina use_gpu_boids_, boid_params_, ball_screen_uniforms_ de Engine - Elimina syncAndExitGpuBoids() i tot el compute dispatch de render() - Mode BOIDS ara usa sempre boid_manager_ (CPU, spatial hash O(N)) i renderitza via gpu_ball_buffer_ instanced (mateix path que PHYSICS) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -51,7 +51,8 @@ constexpr float GRAVITY_CHANGE_LATERAL_MAX = 0.08f; // Velocidad lateral máxim
|
|||||||
constexpr float BALL_SPAWN_MARGIN = 0.15f; // Margen lateral para spawn (0.25 = 25% a cada lado)
|
constexpr float BALL_SPAWN_MARGIN = 0.15f; // Margen lateral para spawn (0.25 = 25% a cada lado)
|
||||||
|
|
||||||
// Escenarios de número de pelotas (teclas 1-8)
|
// Escenarios de número de pelotas (teclas 1-8)
|
||||||
constexpr int BALL_COUNT_SCENARIOS[8] = {10, 50, 100, 500, 1000, 5000, 10000, 50000};
|
// Fase 1 (instanced rendering): límit pràctic ~100K a 60fps (physics bound)
|
||||||
|
constexpr int BALL_COUNT_SCENARIOS[8] = {10, 50, 100, 500, 1000, 5000, 10000, 100000};
|
||||||
|
|
||||||
// Límites de escenario para modos automáticos (índices en BALL_COUNT_SCENARIOS)
|
// Límites de escenario para modos automáticos (índices en BALL_COUNT_SCENARIOS)
|
||||||
// BALL_COUNT_SCENARIOS = {10, 50, 100, 500, 1000, 5000, 10000, 50000}
|
// BALL_COUNT_SCENARIOS = {10, 50, 100, 500, 1000, 5000, 10000, 50000}
|
||||||
|
|||||||
@@ -237,6 +237,13 @@ bool Engine::initialize(int width, int height, int zoom, bool fullscreen, AppMod
|
|||||||
success = false;
|
success = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gpu_ball_buffer_ = std::make_unique<GpuBallBuffer>();
|
||||||
|
if (!gpu_ball_buffer_->init(gpu_ctx_->device())) {
|
||||||
|
std::cerr << "ERROR: No se pudo crear el ball buffer GPU" << std::endl;
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
ball_gpu_data_.reserve(GpuBallBuffer::MAX_BALLS);
|
||||||
|
|
||||||
offscreen_tex_ = std::make_unique<GpuTexture>();
|
offscreen_tex_ = std::make_unique<GpuTexture>();
|
||||||
if (!offscreen_tex_->createRenderTarget(gpu_ctx_->device(),
|
if (!offscreen_tex_->createRenderTarget(gpu_ctx_->device(),
|
||||||
current_screen_width_, current_screen_height_,
|
current_screen_width_, current_screen_height_,
|
||||||
@@ -378,6 +385,7 @@ void Engine::shutdown() {
|
|||||||
if (white_tex_) { white_tex_->destroy(gpu_ctx_->device()); white_tex_.reset(); }
|
if (white_tex_) { white_tex_->destroy(gpu_ctx_->device()); white_tex_.reset(); }
|
||||||
if (offscreen_tex_) { offscreen_tex_->destroy(gpu_ctx_->device()); offscreen_tex_.reset(); }
|
if (offscreen_tex_) { offscreen_tex_->destroy(gpu_ctx_->device()); offscreen_tex_.reset(); }
|
||||||
if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); }
|
if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); }
|
||||||
|
if (gpu_ball_buffer_) { gpu_ball_buffer_->destroy(gpu_ctx_->device()); gpu_ball_buffer_.reset(); }
|
||||||
if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); }
|
if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -437,7 +445,7 @@ void Engine::update() {
|
|||||||
// Modo Figura 3D: actualizar figura polimórfica
|
// Modo Figura 3D: actualizar figura polimórfica
|
||||||
updateShape();
|
updateShape();
|
||||||
} else if (current_mode_ == SimulationMode::BOIDS) {
|
} else if (current_mode_ == SimulationMode::BOIDS) {
|
||||||
// Modo Boids: actualizar comportamiento de enjambre (delegado a BoidManager)
|
// CPU boids: actualizar comportamiento de enjambre (delegado a BoidManager)
|
||||||
boid_manager_->update(delta_time_);
|
boid_manager_->update(delta_time_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -544,18 +552,17 @@ void Engine::toggleDepthZoom() {
|
|||||||
// Boids (comportamiento de enjambre)
|
// Boids (comportamiento de enjambre)
|
||||||
void Engine::toggleBoidsMode(bool force_gravity_on) {
|
void Engine::toggleBoidsMode(bool force_gravity_on) {
|
||||||
if (current_mode_ == SimulationMode::BOIDS) {
|
if (current_mode_ == SimulationMode::BOIDS) {
|
||||||
// Salir del modo boids (velocidades ya son time-based, no requiere conversión)
|
// Salir del modo boids
|
||||||
current_mode_ = SimulationMode::PHYSICS;
|
current_mode_ = SimulationMode::PHYSICS;
|
||||||
boid_manager_->deactivateBoids(force_gravity_on); // Pasar parámetro para control preciso
|
boid_manager_->deactivateBoids(force_gravity_on);
|
||||||
} else {
|
} else {
|
||||||
// Entrar al modo boids (desde PHYSICS o SHAPE)
|
// Entrar al modo boids (desde PHYSICS o SHAPE)
|
||||||
if (current_mode_ == SimulationMode::SHAPE) {
|
if (current_mode_ == SimulationMode::SHAPE) {
|
||||||
// Si estamos en modo shape, salir primero sin forzar gravedad
|
|
||||||
shape_manager_->toggleShapeMode(false);
|
shape_manager_->toggleShapeMode(false);
|
||||||
current_mode_ = SimulationMode::PHYSICS;
|
current_mode_ = SimulationMode::PHYSICS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Activar modo boids
|
// Activar modo boids en CPU (configura gravedad OFF, inicializa velocidades)
|
||||||
current_mode_ = SimulationMode::BOIDS;
|
current_mode_ = SimulationMode::BOIDS;
|
||||||
boid_manager_->activateBoids();
|
boid_manager_->activateBoids();
|
||||||
}
|
}
|
||||||
@@ -728,8 +735,12 @@ void Engine::render() {
|
|||||||
|
|
||||||
// Sprites (balls)
|
// Sprites (balls)
|
||||||
const auto& balls = scene_manager_->getBalls();
|
const auto& balls = scene_manager_->getBalls();
|
||||||
|
const float sw = static_cast<float>(current_screen_width_);
|
||||||
|
const float sh = static_cast<float>(current_screen_height_);
|
||||||
|
|
||||||
if (current_mode_ == SimulationMode::SHAPE) {
|
if (current_mode_ == SimulationMode::SHAPE) {
|
||||||
// Bucket sort by depth Z (Painter's Algorithm)
|
// SHAPE mode: bucket sort by depth Z (Painter's Algorithm), with depth scale.
|
||||||
|
// Uses the sprite batch (supports per-sprite scale, needed for depth zoom).
|
||||||
for (size_t i = 0; i < balls.size(); i++) {
|
for (size_t i = 0; i < balls.size(); i++) {
|
||||||
int b = static_cast<int>(balls[i]->getDepthBrightness() * (DEPTH_SORT_BUCKETS - 1));
|
int b = static_cast<int>(balls[i]->getDepthBrightness() * (DEPTH_SORT_BUCKETS - 1));
|
||||||
depth_buckets_[std::clamp(b, 0, DEPTH_SORT_BUCKETS - 1)].push_back(i);
|
depth_buckets_[std::clamp(b, 0, DEPTH_SORT_BUCKETS - 1)].push_back(i);
|
||||||
@@ -745,39 +756,48 @@ void Engine::render() {
|
|||||||
color.r / 255.0f * bf,
|
color.r / 255.0f * bf,
|
||||||
color.g / 255.0f * bf,
|
color.g / 255.0f * bf,
|
||||||
color.b / 255.0f * bf,
|
color.b / 255.0f * bf,
|
||||||
1.0f, depth_scale,
|
1.0f, depth_scale, sw, sh);
|
||||||
static_cast<float>(current_screen_width_),
|
|
||||||
static_cast<float>(current_screen_height_));
|
|
||||||
}
|
}
|
||||||
depth_buckets_[b].clear();
|
depth_buckets_[b].clear();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
size_t idx = 0;
|
// PHYSICS / CPU-BOIDS mode: build instanced ball buffer (GPU instanced rendering).
|
||||||
for (const auto& ball : balls) {
|
// 32 bytes per ball instead of 4×32 bytes per quad — 4× less upload bandwidth.
|
||||||
SDL_FRect pos = ball->getPosition();
|
ball_gpu_data_.clear();
|
||||||
|
for (size_t idx = 0; idx < balls.size(); idx++) {
|
||||||
|
SDL_FRect pos = balls[idx]->getPosition();
|
||||||
Color color = theme_manager_->getInterpolatedColor(idx);
|
Color color = theme_manager_->getInterpolatedColor(idx);
|
||||||
sprite_batch_->addSprite(pos.x, pos.y, pos.w, pos.h,
|
// Convert to NDC center + NDC half-size (both positive)
|
||||||
color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
|
float cx = ((pos.x + pos.w * 0.5f) / sw) * 2.0f - 1.0f;
|
||||||
1.0f, 1.0f,
|
float cy = 1.0f - ((pos.y + pos.h * 0.5f) / sh) * 2.0f;
|
||||||
static_cast<float>(current_screen_width_),
|
float hw = pos.w / sw;
|
||||||
static_cast<float>(current_screen_height_));
|
float hh = pos.h / sh;
|
||||||
idx++;
|
ball_gpu_data_.push_back({cx, cy, hw, hh,
|
||||||
|
color.r / 255.0f, color.g / 255.0f,
|
||||||
|
color.b / 255.0f, 1.0f});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// UI overlay quad (drawn in Pass 2 over the postfx output)
|
// UI overlay quad (drawn in Pass 2 over the postfx output)
|
||||||
sprite_batch_->addFullscreenOverlay();
|
sprite_batch_->addFullscreenOverlay();
|
||||||
|
|
||||||
// Upload batch to GPU buffers
|
// Upload sprite batch (background + SHAPE balls + UI overlay quad)
|
||||||
if (!sprite_batch_->uploadBatch(gpu_ctx_->device(), cmd)) {
|
if (!sprite_batch_->uploadBatch(gpu_ctx_->device(), cmd)) {
|
||||||
gpu_ctx_->submit(cmd);
|
gpu_ctx_->submit(cmd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Upload instanced ball buffer (PHYSICS / CPU-BOIDS modes)
|
||||||
|
bool use_instanced_balls = (current_mode_ != SimulationMode::SHAPE) && !ball_gpu_data_.empty();
|
||||||
|
if (use_instanced_balls) {
|
||||||
|
gpu_ball_buffer_->upload(gpu_ctx_->device(), cmd,
|
||||||
|
ball_gpu_data_.data(), static_cast<int>(ball_gpu_data_.size()));
|
||||||
|
}
|
||||||
|
|
||||||
GpuTexture* sprite_tex = (!gpu_textures_.empty())
|
GpuTexture* sprite_tex = (!gpu_textures_.empty())
|
||||||
? gpu_textures_[current_texture_index_].get() : nullptr;
|
? gpu_textures_[current_texture_index_].get() : nullptr;
|
||||||
|
|
||||||
// === Pass 1: Render background + sprites to offscreen texture ===
|
// === Pass 1: Render background + balls to offscreen texture ===
|
||||||
if (offscreen_tex_ && offscreen_tex_->isValid() && sprite_tex && sprite_tex->isValid()) {
|
if (offscreen_tex_ && offscreen_tex_->isValid() && sprite_tex && sprite_tex->isValid()) {
|
||||||
SDL_GPUColorTargetInfo ct = {};
|
SDL_GPUColorTargetInfo ct = {};
|
||||||
ct.texture = offscreen_tex_->texture();
|
ct.texture = offscreen_tex_->texture();
|
||||||
@@ -786,22 +806,36 @@ void Engine::render() {
|
|||||||
ct.store_op = SDL_GPU_STOREOP_STORE;
|
ct.store_op = SDL_GPU_STOREOP_STORE;
|
||||||
|
|
||||||
SDL_GPURenderPass* pass1 = SDL_BeginGPURenderPass(cmd, &ct, 1, nullptr);
|
SDL_GPURenderPass* pass1 = SDL_BeginGPURenderPass(cmd, &ct, 1, nullptr);
|
||||||
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
|
|
||||||
|
|
||||||
|
// Background (white texture tinted by vertex color, via sprite batch)
|
||||||
|
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
|
||||||
|
{
|
||||||
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
|
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
|
||||||
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
|
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
|
||||||
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
|
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
|
||||||
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
|
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
|
||||||
|
}
|
||||||
// Background (white texture tinted by vertex color)
|
|
||||||
if (white_tex_ && white_tex_->isValid() && sprite_batch_->bgIndexCount() > 0) {
|
if (white_tex_ && white_tex_->isValid() && sprite_batch_->bgIndexCount() > 0) {
|
||||||
SDL_GPUTextureSamplerBinding tsb = {white_tex_->texture(), white_tex_->sampler()};
|
SDL_GPUTextureSamplerBinding tsb = {white_tex_->texture(), white_tex_->sampler()};
|
||||||
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
||||||
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->bgIndexCount(), 1, 0, 0, 0);
|
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->bgIndexCount(), 1, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sprites
|
if (use_instanced_balls && gpu_ball_buffer_->count() > 0) {
|
||||||
if (sprite_batch_->spriteIndexCount() > 0) {
|
// PHYSICS / CPU-BOIDS: instanced rendering — 6 procedural vertices per instance
|
||||||
|
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->ballPipeline());
|
||||||
|
SDL_GPUBufferBinding ball_vb = {gpu_ball_buffer_->buffer(), 0};
|
||||||
|
SDL_BindGPUVertexBuffers(pass1, 0, &ball_vb, 1);
|
||||||
|
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
|
||||||
|
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
||||||
|
SDL_DrawGPUPrimitives(pass1, 6, static_cast<Uint32>(gpu_ball_buffer_->count()), 0, 0);
|
||||||
|
} else if (!use_instanced_balls && sprite_batch_->spriteIndexCount() > 0) {
|
||||||
|
// SHAPE: sprite batch with depth sort (re-bind sprite pipeline + buffers)
|
||||||
|
SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline());
|
||||||
|
SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0};
|
||||||
|
SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0};
|
||||||
|
SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1);
|
||||||
|
SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT);
|
||||||
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
|
SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()};
|
||||||
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1);
|
||||||
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->spriteIndexCount(), 1,
|
SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->spriteIndexCount(), 1,
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
#include "boids_mgr/boid_manager.hpp" // for BoidManager
|
#include "boids_mgr/boid_manager.hpp" // for BoidManager
|
||||||
#include "defines.hpp" // for GravityDirection, ColorTheme, ShapeType
|
#include "defines.hpp" // for GravityDirection, ColorTheme, ShapeType
|
||||||
#include "external/texture.hpp" // for Texture
|
#include "external/texture.hpp" // for Texture
|
||||||
|
#include "gpu/gpu_ball_buffer.hpp" // for GpuBallBuffer, BallGPUData
|
||||||
#include "gpu/gpu_context.hpp" // for GpuContext
|
#include "gpu/gpu_context.hpp" // for GpuContext
|
||||||
#include "gpu/gpu_pipeline.hpp" // for GpuPipeline
|
#include "gpu/gpu_pipeline.hpp" // for GpuPipeline
|
||||||
#include "gpu/gpu_sprite_batch.hpp" // for GpuSpriteBatch
|
#include "gpu/gpu_sprite_batch.hpp" // for GpuSpriteBatch
|
||||||
@@ -137,8 +138,10 @@ class Engine {
|
|||||||
|
|
||||||
// === SDL_GPU rendering pipeline ===
|
// === SDL_GPU rendering pipeline ===
|
||||||
std::unique_ptr<GpuContext> gpu_ctx_; // Device + swapchain
|
std::unique_ptr<GpuContext> gpu_ctx_; // Device + swapchain
|
||||||
std::unique_ptr<GpuPipeline> gpu_pipeline_; // Sprite + postfx pipelines
|
std::unique_ptr<GpuPipeline> gpu_pipeline_; // Sprite + ball + postfx pipelines
|
||||||
std::unique_ptr<GpuSpriteBatch> sprite_batch_; // Per-frame vertex/index batch
|
std::unique_ptr<GpuSpriteBatch> sprite_batch_; // Per-frame vertex/index batch (bg + shape + UI)
|
||||||
|
std::unique_ptr<GpuBallBuffer> gpu_ball_buffer_; // Instanced ball instance data (PHYSICS/BOIDS)
|
||||||
|
std::vector<BallGPUData> ball_gpu_data_; // CPU-side staging vector (reused each frame)
|
||||||
std::unique_ptr<GpuTexture> offscreen_tex_; // Offscreen render target (Pass 1)
|
std::unique_ptr<GpuTexture> offscreen_tex_; // Offscreen render target (Pass 1)
|
||||||
std::unique_ptr<GpuTexture> white_tex_; // 1×1 white (background gradient)
|
std::unique_ptr<GpuTexture> white_tex_; // 1×1 white (background gradient)
|
||||||
std::unique_ptr<GpuTexture> ui_tex_; // UI text overlay texture
|
std::unique_ptr<GpuTexture> ui_tex_; // UI text overlay texture
|
||||||
@@ -246,4 +249,5 @@ class Engine {
|
|||||||
void recreateOffscreenTexture(); // Recreate when resolution changes
|
void recreateOffscreenTexture(); // Recreate when resolution changes
|
||||||
void renderUIToSurface(); // Render text/UI to ui_surface_
|
void renderUIToSurface(); // Render text/UI to ui_surface_
|
||||||
void uploadUISurface(SDL_GPUCommandBuffer* cmd_buf); // Upload ui_surface_ → ui_tex_
|
void uploadUISurface(SDL_GPUCommandBuffer* cmd_buf); // Upload ui_surface_ → ui_tex_
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
65
source/gpu/gpu_ball_buffer.cpp
Normal file
65
source/gpu/gpu_ball_buffer.cpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#include "gpu_ball_buffer.hpp"
|
||||||
|
|
||||||
|
#include <SDL3/SDL_log.h>
|
||||||
|
#include <algorithm> // std::min
|
||||||
|
#include <cstring> // memcpy
|
||||||
|
|
||||||
|
bool GpuBallBuffer::init(SDL_GPUDevice* device) {
|
||||||
|
Uint32 buf_size = static_cast<Uint32>(MAX_BALLS) * sizeof(BallGPUData);
|
||||||
|
|
||||||
|
// GPU vertex buffer (instance-rate data read by the ball instanced shader)
|
||||||
|
SDL_GPUBufferCreateInfo buf_info = {};
|
||||||
|
buf_info.usage = SDL_GPU_BUFFERUSAGE_VERTEX;
|
||||||
|
buf_info.size = buf_size;
|
||||||
|
gpu_buf_ = SDL_CreateGPUBuffer(device, &buf_info);
|
||||||
|
if (!gpu_buf_) {
|
||||||
|
SDL_Log("GpuBallBuffer: GPU buffer creation failed: %s", SDL_GetError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transfer buffer (upload staging, cycled every frame)
|
||||||
|
SDL_GPUTransferBufferCreateInfo tb_info = {};
|
||||||
|
tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD;
|
||||||
|
tb_info.size = buf_size;
|
||||||
|
transfer_buf_ = SDL_CreateGPUTransferBuffer(device, &tb_info);
|
||||||
|
if (!transfer_buf_) {
|
||||||
|
SDL_Log("GpuBallBuffer: transfer buffer creation failed: %s", SDL_GetError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_Log("GpuBallBuffer: initialized (capacity %d balls, %.1f MB VRAM)",
|
||||||
|
MAX_BALLS, buf_size / (1024.0f * 1024.0f));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GpuBallBuffer::destroy(SDL_GPUDevice* device) {
|
||||||
|
if (!device) return;
|
||||||
|
if (transfer_buf_) { SDL_ReleaseGPUTransferBuffer(device, transfer_buf_); transfer_buf_ = nullptr; }
|
||||||
|
if (gpu_buf_) { SDL_ReleaseGPUBuffer(device, gpu_buf_); gpu_buf_ = nullptr; }
|
||||||
|
count_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GpuBallBuffer::upload(SDL_GPUDevice* device, SDL_GPUCommandBuffer* cmd,
|
||||||
|
const BallGPUData* data, int count) {
|
||||||
|
if (!data || count <= 0) { count_ = 0; return false; }
|
||||||
|
count = std::min(count, MAX_BALLS);
|
||||||
|
|
||||||
|
Uint32 upload_size = static_cast<Uint32>(count) * sizeof(BallGPUData);
|
||||||
|
|
||||||
|
void* ptr = SDL_MapGPUTransferBuffer(device, transfer_buf_, true /* cycle */);
|
||||||
|
if (!ptr) {
|
||||||
|
SDL_Log("GpuBallBuffer: transfer buffer map failed: %s", SDL_GetError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
memcpy(ptr, data, upload_size);
|
||||||
|
SDL_UnmapGPUTransferBuffer(device, transfer_buf_);
|
||||||
|
|
||||||
|
SDL_GPUCopyPass* copy = SDL_BeginGPUCopyPass(cmd);
|
||||||
|
SDL_GPUTransferBufferLocation src = { transfer_buf_, 0 };
|
||||||
|
SDL_GPUBufferRegion dst = { gpu_buf_, 0, upload_size };
|
||||||
|
SDL_UploadToGPUBuffer(copy, &src, &dst, true /* cycle */);
|
||||||
|
SDL_EndGPUCopyPass(copy);
|
||||||
|
|
||||||
|
count_ = count;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
47
source/gpu/gpu_ball_buffer.hpp
Normal file
47
source/gpu/gpu_ball_buffer.hpp
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <SDL3/SDL_gpu.h>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// BallGPUData — 32-byte per-instance record stored in VRAM.
|
||||||
|
// Positions and sizes pre-converted to NDC space on CPU so the vertex shader
|
||||||
|
// needs no screen-dimension uniform.
|
||||||
|
// cx, cy : NDC center (cx = (x + w/2)/sw*2-1, cy = 1-(y+h/2)/sh*2)
|
||||||
|
// hw, hh : NDC half-size (hw = w/sw, hh = h/sh, both positive)
|
||||||
|
// r,g,b,a: RGBA in [0,1]
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
struct BallGPUData {
|
||||||
|
float cx, cy; // NDC center
|
||||||
|
float hw, hh; // NDC half-size (positive)
|
||||||
|
float r, g, b, a; // RGBA color [0,1]
|
||||||
|
};
|
||||||
|
static_assert(sizeof(BallGPUData) == 32, "BallGPUData must be 32 bytes");
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// GpuBallBuffer — owns the GPU vertex buffer used for instanced ball rendering.
|
||||||
|
//
|
||||||
|
// Usage per frame:
|
||||||
|
// buffer.upload(device, cmd, data, count); // inside a copy pass
|
||||||
|
// // Then in render pass: bind buffer, SDL_DrawGPUPrimitives(pass, 6, count, 0, 0)
|
||||||
|
// ============================================================================
|
||||||
|
class GpuBallBuffer {
|
||||||
|
public:
|
||||||
|
static constexpr int MAX_BALLS = 500000;
|
||||||
|
|
||||||
|
bool init(SDL_GPUDevice* device);
|
||||||
|
void destroy(SDL_GPUDevice* device);
|
||||||
|
|
||||||
|
// Upload ball array to GPU via an internal copy pass.
|
||||||
|
// count is clamped to MAX_BALLS. Returns false on error or empty input.
|
||||||
|
bool upload(SDL_GPUDevice* device, SDL_GPUCommandBuffer* cmd,
|
||||||
|
const BallGPUData* data, int count);
|
||||||
|
|
||||||
|
SDL_GPUBuffer* buffer() const { return gpu_buf_; }
|
||||||
|
int count() const { return count_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
SDL_GPUBuffer* gpu_buf_ = nullptr;
|
||||||
|
SDL_GPUTransferBuffer* transfer_buf_ = nullptr;
|
||||||
|
int count_ = 0;
|
||||||
|
};
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
#include "gpu_pipeline.hpp"
|
#include "gpu_pipeline.hpp"
|
||||||
#include "gpu_sprite_batch.hpp" // for GpuVertex layout
|
#include "gpu_sprite_batch.hpp" // for GpuVertex layout
|
||||||
|
#include "gpu_ball_buffer.hpp" // for BallGPUData layout
|
||||||
|
|
||||||
#include <SDL3/SDL_log.h>
|
#include <SDL3/SDL_log.h>
|
||||||
#include <cstddef> // offsetof
|
#include <cstddef> // offsetof
|
||||||
|
#include <cstring> // strlen
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// MSL Shaders (Metal Shading Language, macOS)
|
// MSL Shaders (Metal Shading Language, macOS)
|
||||||
@@ -133,6 +135,60 @@ fragment float4 postfx_fs(PostVOut in [[stage_in]],
|
|||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Ball instanced vertex shader
|
||||||
|
// Reads BallGPUData as per-instance attributes (input_rate = INSTANCE).
|
||||||
|
// Generates a 6-vertex quad (2 triangles) per instance using vertex_id.
|
||||||
|
//
|
||||||
|
// BallGPUData layout:
|
||||||
|
// float2 center [[attribute(0)]] — NDC center (cx, cy)
|
||||||
|
// float2 half [[attribute(1)]] — NDC half-size (hw, hh), both positive
|
||||||
|
// float4 col [[attribute(2)]] — RGBA [0,1]
|
||||||
|
//
|
||||||
|
// NDC convention (SDL / Metal): Y increases upward (+1=top, -1=bottom).
|
||||||
|
// half.x = w/screen_w, half.y = h/screen_h (positive; Y is not flipped)
|
||||||
|
// Vertex order: TL TR BL | TR BR BL (CCW winding, standard Metal)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
static const char* kBallInstancedVertMSL = R"(
|
||||||
|
#include <metal_stdlib>
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct BallInstance {
|
||||||
|
float2 center [[attribute(0)]]; // NDC center
|
||||||
|
float2 halfsize [[attribute(1)]]; // NDC half-size (both positive); 'half' is reserved in MSL
|
||||||
|
float4 col [[attribute(2)]];
|
||||||
|
};
|
||||||
|
struct BallVOut {
|
||||||
|
float4 pos [[position]];
|
||||||
|
float2 uv;
|
||||||
|
float4 col;
|
||||||
|
};
|
||||||
|
|
||||||
|
vertex BallVOut ball_instanced_vs(BallInstance inst [[stage_in]],
|
||||||
|
uint vid [[vertex_id]]) {
|
||||||
|
// Offset signs for each of the 6 vertices (TL TR BL | TR BR BL)
|
||||||
|
const float2 offsets[6] = {
|
||||||
|
{-1.0f, 1.0f}, // TL
|
||||||
|
{ 1.0f, 1.0f}, // TR
|
||||||
|
{-1.0f, -1.0f}, // BL
|
||||||
|
{ 1.0f, 1.0f}, // TR (shared)
|
||||||
|
{ 1.0f, -1.0f}, // BR
|
||||||
|
{-1.0f, -1.0f}, // BL (shared)
|
||||||
|
};
|
||||||
|
// UV: TL=(0,0) TR=(1,0) BL=(0,1) BR=(1,1)
|
||||||
|
const float2 uvs[6] = {
|
||||||
|
{0.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 1.0f},
|
||||||
|
{1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f},
|
||||||
|
};
|
||||||
|
float2 pos = inst.center + offsets[vid] * inst.halfsize;
|
||||||
|
BallVOut out;
|
||||||
|
out.pos = float4(pos.x, pos.y, 0.0f, 1.0f);
|
||||||
|
out.uv = uvs[vid];
|
||||||
|
out.col = inst.col;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// GpuPipeline implementation
|
// GpuPipeline implementation
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -222,6 +278,71 @@ bool GpuPipeline::init(SDL_GPUDevice* device,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------
|
||||||
|
// Ball instanced pipeline
|
||||||
|
// Vertex: ball_instanced_vs (BallGPUData per-instance, no index buffer)
|
||||||
|
// Fragment: sprite_fs (same texture+color blend as sprite pipeline)
|
||||||
|
// Targets: offscreen (same as sprite pipeline)
|
||||||
|
// ----------------------------------------------------------------
|
||||||
|
SDL_GPUShader* ball_vert = createShader(device, kBallInstancedVertMSL, "ball_instanced_vs",
|
||||||
|
SDL_GPU_SHADERSTAGE_VERTEX, 0, 0);
|
||||||
|
SDL_GPUShader* ball_frag = createShader(device, kSpriteFragMSL, "sprite_fs",
|
||||||
|
SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0);
|
||||||
|
if (!ball_vert || !ball_frag) {
|
||||||
|
SDL_Log("GpuPipeline: failed to create ball instanced shaders");
|
||||||
|
if (ball_vert) SDL_ReleaseGPUShader(device, ball_vert);
|
||||||
|
if (ball_frag) SDL_ReleaseGPUShader(device, ball_frag);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vertex input: BallGPUData as per-instance data (step rate = 1 instance)
|
||||||
|
SDL_GPUVertexBufferDescription ball_vb_desc = {};
|
||||||
|
ball_vb_desc.slot = 0;
|
||||||
|
ball_vb_desc.pitch = sizeof(BallGPUData);
|
||||||
|
ball_vb_desc.input_rate = SDL_GPU_VERTEXINPUTRATE_INSTANCE;
|
||||||
|
ball_vb_desc.instance_step_rate = 1;
|
||||||
|
|
||||||
|
SDL_GPUVertexAttribute ball_attrs[3] = {};
|
||||||
|
// attr 0: center (float2) at offset 0
|
||||||
|
ball_attrs[0].location = 0;
|
||||||
|
ball_attrs[0].buffer_slot = 0;
|
||||||
|
ball_attrs[0].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2;
|
||||||
|
ball_attrs[0].offset = static_cast<Uint32>(offsetof(BallGPUData, cx));
|
||||||
|
// attr 1: half-size (float2) at offset 8
|
||||||
|
ball_attrs[1].location = 1;
|
||||||
|
ball_attrs[1].buffer_slot = 0;
|
||||||
|
ball_attrs[1].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2;
|
||||||
|
ball_attrs[1].offset = static_cast<Uint32>(offsetof(BallGPUData, hw));
|
||||||
|
// attr 2: color (float4) at offset 16
|
||||||
|
ball_attrs[2].location = 2;
|
||||||
|
ball_attrs[2].buffer_slot = 0;
|
||||||
|
ball_attrs[2].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT4;
|
||||||
|
ball_attrs[2].offset = static_cast<Uint32>(offsetof(BallGPUData, r));
|
||||||
|
|
||||||
|
SDL_GPUVertexInputState ball_vertex_input = {};
|
||||||
|
ball_vertex_input.vertex_buffer_descriptions = &ball_vb_desc;
|
||||||
|
ball_vertex_input.num_vertex_buffers = 1;
|
||||||
|
ball_vertex_input.vertex_attributes = ball_attrs;
|
||||||
|
ball_vertex_input.num_vertex_attributes = 3;
|
||||||
|
|
||||||
|
SDL_GPUGraphicsPipelineCreateInfo ball_pipe_info = {};
|
||||||
|
ball_pipe_info.vertex_shader = ball_vert;
|
||||||
|
ball_pipe_info.fragment_shader = ball_frag;
|
||||||
|
ball_pipe_info.vertex_input_state = ball_vertex_input;
|
||||||
|
ball_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST;
|
||||||
|
ball_pipe_info.target_info.num_color_targets = 1;
|
||||||
|
ball_pipe_info.target_info.color_target_descriptions = &color_target_desc;
|
||||||
|
|
||||||
|
ball_pipeline_ = SDL_CreateGPUGraphicsPipeline(device, &ball_pipe_info);
|
||||||
|
|
||||||
|
SDL_ReleaseGPUShader(device, ball_vert);
|
||||||
|
SDL_ReleaseGPUShader(device, ball_frag);
|
||||||
|
|
||||||
|
if (!ball_pipeline_) {
|
||||||
|
SDL_Log("GpuPipeline: ball instanced pipeline creation failed: %s", SDL_GetError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------
|
// ----------------------------------------------------------------
|
||||||
// UI overlay pipeline (same as sprite but renders to swapchain format)
|
// UI overlay pipeline (same as sprite but renders to swapchain format)
|
||||||
// Reuse sprite shaders with different target format.
|
// Reuse sprite shaders with different target format.
|
||||||
@@ -275,12 +396,13 @@ bool GpuPipeline::init(SDL_GPUDevice* device,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
SDL_Log("GpuPipeline: sprite and postfx pipelines created successfully");
|
SDL_Log("GpuPipeline: all pipelines created successfully");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GpuPipeline::destroy(SDL_GPUDevice* device) {
|
void GpuPipeline::destroy(SDL_GPUDevice* device) {
|
||||||
if (sprite_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, sprite_pipeline_); sprite_pipeline_ = nullptr; }
|
if (sprite_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, sprite_pipeline_); sprite_pipeline_ = nullptr; }
|
||||||
|
if (ball_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, ball_pipeline_); ball_pipeline_ = nullptr; }
|
||||||
if (postfx_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, postfx_pipeline_); postfx_pipeline_ = nullptr; }
|
if (postfx_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, postfx_pipeline_); postfx_pipeline_ = nullptr; }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -289,7 +411,8 @@ SDL_GPUShader* GpuPipeline::createShader(SDL_GPUDevice* device,
|
|||||||
const char* entrypoint,
|
const char* entrypoint,
|
||||||
SDL_GPUShaderStage stage,
|
SDL_GPUShaderStage stage,
|
||||||
Uint32 num_samplers,
|
Uint32 num_samplers,
|
||||||
Uint32 num_uniform_buffers) {
|
Uint32 num_uniform_buffers,
|
||||||
|
Uint32 num_storage_buffers) {
|
||||||
SDL_GPUShaderCreateInfo info = {};
|
SDL_GPUShaderCreateInfo info = {};
|
||||||
info.code = reinterpret_cast<const Uint8*>(msl_source);
|
info.code = reinterpret_cast<const Uint8*>(msl_source);
|
||||||
info.code_size = static_cast<size_t>(strlen(msl_source) + 1);
|
info.code_size = static_cast<size_t>(strlen(msl_source) + 1);
|
||||||
@@ -298,7 +421,7 @@ SDL_GPUShader* GpuPipeline::createShader(SDL_GPUDevice* device,
|
|||||||
info.stage = stage;
|
info.stage = stage;
|
||||||
info.num_samplers = num_samplers;
|
info.num_samplers = num_samplers;
|
||||||
info.num_storage_textures = 0;
|
info.num_storage_textures = 0;
|
||||||
info.num_storage_buffers = 0;
|
info.num_storage_buffers = num_storage_buffers;
|
||||||
info.num_uniform_buffers = num_uniform_buffers;
|
info.num_uniform_buffers = num_uniform_buffers;
|
||||||
|
|
||||||
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
|
SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info);
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ struct PostFXUniforms {
|
|||||||
//
|
//
|
||||||
// sprite_pipeline_ : textured quads, alpha blending.
|
// sprite_pipeline_ : textured quads, alpha blending.
|
||||||
// Vertex layout: GpuVertex (pos float2, uv float2, col float4).
|
// Vertex layout: GpuVertex (pos float2, uv float2, col float4).
|
||||||
|
// ball_pipeline_ : instanced ball rendering, alpha blending.
|
||||||
|
// Vertex layout: BallGPUData as per-instance data (input_rate=INSTANCE).
|
||||||
|
// 6 procedural vertices per instance (no index buffer).
|
||||||
// postfx_pipeline_ : full-screen triangle, no vertex buffer, no blend.
|
// postfx_pipeline_ : full-screen triangle, no vertex buffer, no blend.
|
||||||
// Reads offscreen texture, writes to swapchain.
|
// Reads offscreen texture, writes to swapchain.
|
||||||
// Accepts PostFXUniforms via fragment uniform buffer slot 0.
|
// Accepts PostFXUniforms via fragment uniform buffer slot 0.
|
||||||
@@ -33,6 +36,7 @@ public:
|
|||||||
void destroy(SDL_GPUDevice* device);
|
void destroy(SDL_GPUDevice* device);
|
||||||
|
|
||||||
SDL_GPUGraphicsPipeline* spritePipeline() const { return sprite_pipeline_; }
|
SDL_GPUGraphicsPipeline* spritePipeline() const { return sprite_pipeline_; }
|
||||||
|
SDL_GPUGraphicsPipeline* ballPipeline() const { return ball_pipeline_; }
|
||||||
SDL_GPUGraphicsPipeline* postfxPipeline() const { return postfx_pipeline_; }
|
SDL_GPUGraphicsPipeline* postfxPipeline() const { return postfx_pipeline_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -41,8 +45,10 @@ private:
|
|||||||
const char* entrypoint,
|
const char* entrypoint,
|
||||||
SDL_GPUShaderStage stage,
|
SDL_GPUShaderStage stage,
|
||||||
Uint32 num_samplers,
|
Uint32 num_samplers,
|
||||||
Uint32 num_uniform_buffers);
|
Uint32 num_uniform_buffers,
|
||||||
|
Uint32 num_storage_buffers = 0);
|
||||||
|
|
||||||
SDL_GPUGraphicsPipeline* sprite_pipeline_ = nullptr;
|
SDL_GPUGraphicsPipeline* sprite_pipeline_ = nullptr;
|
||||||
|
SDL_GPUGraphicsPipeline* ball_pipeline_ = nullptr;
|
||||||
SDL_GPUGraphicsPipeline* postfx_pipeline_ = nullptr;
|
SDL_GPUGraphicsPipeline* postfx_pipeline_ = nullptr;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ bool InputHandler::processEvents(Engine& engine) {
|
|||||||
|
|
||||||
// Toggle Modo Boids (comportamiento de enjambre)
|
// Toggle Modo Boids (comportamiento de enjambre)
|
||||||
case SDLK_B:
|
case SDLK_B:
|
||||||
// engine.toggleBoidsMode();
|
engine.toggleBoidsMode();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Ciclar temas de color (movido de B a C)
|
// Ciclar temas de color (movido de B a C)
|
||||||
|
|||||||
Reference in New Issue
Block a user