diff --git a/source/defines.hpp b/source/defines.hpp index e03c0e8..a0809cb 100644 --- a/source/defines.hpp +++ b/source/defines.hpp @@ -51,7 +51,8 @@ constexpr float GRAVITY_CHANGE_LATERAL_MAX = 0.08f; // Velocidad lateral máxim constexpr float BALL_SPAWN_MARGIN = 0.15f; // Margen lateral para spawn (0.25 = 25% a cada lado) // Escenarios de número de pelotas (teclas 1-8) -constexpr int BALL_COUNT_SCENARIOS[8] = {10, 50, 100, 500, 1000, 5000, 10000, 50000}; +// Fase 1 (instanced rendering): límit pràctic ~100K a 60fps (physics bound) +constexpr int BALL_COUNT_SCENARIOS[8] = {10, 50, 100, 500, 1000, 5000, 10000, 100000}; // Límites de escenario para modos automáticos (índices en BALL_COUNT_SCENARIOS) // BALL_COUNT_SCENARIOS = {10, 50, 100, 500, 1000, 5000, 10000, 50000} diff --git a/source/engine.cpp b/source/engine.cpp index aca76a0..aa3605c 100644 --- a/source/engine.cpp +++ b/source/engine.cpp @@ -237,6 +237,13 @@ bool Engine::initialize(int width, int height, int zoom, bool fullscreen, AppMod success = false; } + gpu_ball_buffer_ = std::make_unique(); + if (!gpu_ball_buffer_->init(gpu_ctx_->device())) { + std::cerr << "ERROR: No se pudo crear el ball buffer GPU" << std::endl; + success = false; + } + ball_gpu_data_.reserve(GpuBallBuffer::MAX_BALLS); + offscreen_tex_ = std::make_unique(); if (!offscreen_tex_->createRenderTarget(gpu_ctx_->device(), current_screen_width_, current_screen_height_, @@ -377,8 +384,9 @@ void Engine::shutdown() { if (ui_tex_) { ui_tex_->destroy(gpu_ctx_->device()); ui_tex_.reset(); } if (white_tex_) { white_tex_->destroy(gpu_ctx_->device()); white_tex_.reset(); } if (offscreen_tex_) { offscreen_tex_->destroy(gpu_ctx_->device()); offscreen_tex_.reset(); } - if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); } - if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); } + if (sprite_batch_) { sprite_batch_->destroy(gpu_ctx_->device()); sprite_batch_.reset(); } + if (gpu_ball_buffer_) { gpu_ball_buffer_->destroy(gpu_ctx_->device()); gpu_ball_buffer_.reset(); } + if (gpu_pipeline_) { gpu_pipeline_->destroy(gpu_ctx_->device()); gpu_pipeline_.reset(); } } // Destroy software UI renderer and surface @@ -437,7 +445,7 @@ void Engine::update() { // Modo Figura 3D: actualizar figura polimórfica updateShape(); } else if (current_mode_ == SimulationMode::BOIDS) { - // Modo Boids: actualizar comportamiento de enjambre (delegado a BoidManager) + // CPU boids: actualizar comportamiento de enjambre (delegado a BoidManager) boid_manager_->update(delta_time_); } @@ -544,18 +552,17 @@ void Engine::toggleDepthZoom() { // Boids (comportamiento de enjambre) void Engine::toggleBoidsMode(bool force_gravity_on) { if (current_mode_ == SimulationMode::BOIDS) { - // Salir del modo boids (velocidades ya son time-based, no requiere conversión) + // Salir del modo boids current_mode_ = SimulationMode::PHYSICS; - boid_manager_->deactivateBoids(force_gravity_on); // Pasar parámetro para control preciso + boid_manager_->deactivateBoids(force_gravity_on); } else { // Entrar al modo boids (desde PHYSICS o SHAPE) if (current_mode_ == SimulationMode::SHAPE) { - // Si estamos en modo shape, salir primero sin forzar gravedad shape_manager_->toggleShapeMode(false); current_mode_ = SimulationMode::PHYSICS; } - // Activar modo boids + // Activar modo boids en CPU (configura gravedad OFF, inicializa velocidades) current_mode_ = SimulationMode::BOIDS; boid_manager_->activateBoids(); } @@ -728,8 +735,12 @@ void Engine::render() { // Sprites (balls) const auto& balls = scene_manager_->getBalls(); + const float sw = static_cast(current_screen_width_); + const float sh = static_cast(current_screen_height_); + if (current_mode_ == SimulationMode::SHAPE) { - // Bucket sort by depth Z (Painter's Algorithm) + // SHAPE mode: bucket sort by depth Z (Painter's Algorithm), with depth scale. + // Uses the sprite batch (supports per-sprite scale, needed for depth zoom). for (size_t i = 0; i < balls.size(); i++) { int b = static_cast(balls[i]->getDepthBrightness() * (DEPTH_SORT_BUCKETS - 1)); depth_buckets_[std::clamp(b, 0, DEPTH_SORT_BUCKETS - 1)].push_back(i); @@ -745,39 +756,48 @@ void Engine::render() { color.r / 255.0f * bf, color.g / 255.0f * bf, color.b / 255.0f * bf, - 1.0f, depth_scale, - static_cast(current_screen_width_), - static_cast(current_screen_height_)); + 1.0f, depth_scale, sw, sh); } depth_buckets_[b].clear(); } } else { - size_t idx = 0; - for (const auto& ball : balls) { - SDL_FRect pos = ball->getPosition(); + // PHYSICS / CPU-BOIDS mode: build instanced ball buffer (GPU instanced rendering). + // 32 bytes per ball instead of 4×32 bytes per quad — 4× less upload bandwidth. + ball_gpu_data_.clear(); + for (size_t idx = 0; idx < balls.size(); idx++) { + SDL_FRect pos = balls[idx]->getPosition(); Color color = theme_manager_->getInterpolatedColor(idx); - sprite_batch_->addSprite(pos.x, pos.y, pos.w, pos.h, - color.r / 255.0f, color.g / 255.0f, color.b / 255.0f, - 1.0f, 1.0f, - static_cast(current_screen_width_), - static_cast(current_screen_height_)); - idx++; + // Convert to NDC center + NDC half-size (both positive) + float cx = ((pos.x + pos.w * 0.5f) / sw) * 2.0f - 1.0f; + float cy = 1.0f - ((pos.y + pos.h * 0.5f) / sh) * 2.0f; + float hw = pos.w / sw; + float hh = pos.h / sh; + ball_gpu_data_.push_back({cx, cy, hw, hh, + color.r / 255.0f, color.g / 255.0f, + color.b / 255.0f, 1.0f}); } } // UI overlay quad (drawn in Pass 2 over the postfx output) sprite_batch_->addFullscreenOverlay(); - // Upload batch to GPU buffers + // Upload sprite batch (background + SHAPE balls + UI overlay quad) if (!sprite_batch_->uploadBatch(gpu_ctx_->device(), cmd)) { gpu_ctx_->submit(cmd); return; } + // Upload instanced ball buffer (PHYSICS / CPU-BOIDS modes) + bool use_instanced_balls = (current_mode_ != SimulationMode::SHAPE) && !ball_gpu_data_.empty(); + if (use_instanced_balls) { + gpu_ball_buffer_->upload(gpu_ctx_->device(), cmd, + ball_gpu_data_.data(), static_cast(ball_gpu_data_.size())); + } + GpuTexture* sprite_tex = (!gpu_textures_.empty()) ? gpu_textures_[current_texture_index_].get() : nullptr; - // === Pass 1: Render background + sprites to offscreen texture === + // === Pass 1: Render background + balls to offscreen texture === if (offscreen_tex_ && offscreen_tex_->isValid() && sprite_tex && sprite_tex->isValid()) { SDL_GPUColorTargetInfo ct = {}; ct.texture = offscreen_tex_->texture(); @@ -786,22 +806,36 @@ void Engine::render() { ct.store_op = SDL_GPU_STOREOP_STORE; SDL_GPURenderPass* pass1 = SDL_BeginGPURenderPass(cmd, &ct, 1, nullptr); + + // Background (white texture tinted by vertex color, via sprite batch) SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline()); - - SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0}; - SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0}; - SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1); - SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT); - - // Background (white texture tinted by vertex color) + { + SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0}; + SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0}; + SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1); + SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT); + } if (white_tex_ && white_tex_->isValid() && sprite_batch_->bgIndexCount() > 0) { SDL_GPUTextureSamplerBinding tsb = {white_tex_->texture(), white_tex_->sampler()}; SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1); SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->bgIndexCount(), 1, 0, 0, 0); } - // Sprites - if (sprite_batch_->spriteIndexCount() > 0) { + if (use_instanced_balls && gpu_ball_buffer_->count() > 0) { + // PHYSICS / CPU-BOIDS: instanced rendering — 6 procedural vertices per instance + SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->ballPipeline()); + SDL_GPUBufferBinding ball_vb = {gpu_ball_buffer_->buffer(), 0}; + SDL_BindGPUVertexBuffers(pass1, 0, &ball_vb, 1); + SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()}; + SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1); + SDL_DrawGPUPrimitives(pass1, 6, static_cast(gpu_ball_buffer_->count()), 0, 0); + } else if (!use_instanced_balls && sprite_batch_->spriteIndexCount() > 0) { + // SHAPE: sprite batch with depth sort (re-bind sprite pipeline + buffers) + SDL_BindGPUGraphicsPipeline(pass1, gpu_pipeline_->spritePipeline()); + SDL_GPUBufferBinding vb = {sprite_batch_->vertexBuffer(), 0}; + SDL_GPUBufferBinding ib = {sprite_batch_->indexBuffer(), 0}; + SDL_BindGPUVertexBuffers(pass1, 0, &vb, 1); + SDL_BindGPUIndexBuffer(pass1, &ib, SDL_GPU_INDEXELEMENTSIZE_32BIT); SDL_GPUTextureSamplerBinding tsb = {sprite_tex->texture(), sprite_tex->sampler()}; SDL_BindGPUFragmentSamplers(pass1, 0, &tsb, 1); SDL_DrawGPUIndexedPrimitives(pass1, sprite_batch_->spriteIndexCount(), 1, diff --git a/source/engine.hpp b/source/engine.hpp index 97a3afe..1ef68e9 100644 --- a/source/engine.hpp +++ b/source/engine.hpp @@ -16,6 +16,7 @@ #include "boids_mgr/boid_manager.hpp" // for BoidManager #include "defines.hpp" // for GravityDirection, ColorTheme, ShapeType #include "external/texture.hpp" // for Texture +#include "gpu/gpu_ball_buffer.hpp" // for GpuBallBuffer, BallGPUData #include "gpu/gpu_context.hpp" // for GpuContext #include "gpu/gpu_pipeline.hpp" // for GpuPipeline #include "gpu/gpu_sprite_batch.hpp" // for GpuSpriteBatch @@ -137,8 +138,10 @@ class Engine { // === SDL_GPU rendering pipeline === std::unique_ptr gpu_ctx_; // Device + swapchain - std::unique_ptr gpu_pipeline_; // Sprite + postfx pipelines - std::unique_ptr sprite_batch_; // Per-frame vertex/index batch + std::unique_ptr gpu_pipeline_; // Sprite + ball + postfx pipelines + std::unique_ptr sprite_batch_; // Per-frame vertex/index batch (bg + shape + UI) + std::unique_ptr gpu_ball_buffer_; // Instanced ball instance data (PHYSICS/BOIDS) + std::vector ball_gpu_data_; // CPU-side staging vector (reused each frame) std::unique_ptr offscreen_tex_; // Offscreen render target (Pass 1) std::unique_ptr white_tex_; // 1×1 white (background gradient) std::unique_ptr ui_tex_; // UI text overlay texture @@ -246,4 +249,5 @@ class Engine { void recreateOffscreenTexture(); // Recreate when resolution changes void renderUIToSurface(); // Render text/UI to ui_surface_ void uploadUISurface(SDL_GPUCommandBuffer* cmd_buf); // Upload ui_surface_ → ui_tex_ + }; diff --git a/source/gpu/gpu_ball_buffer.cpp b/source/gpu/gpu_ball_buffer.cpp new file mode 100644 index 0000000..4949b92 --- /dev/null +++ b/source/gpu/gpu_ball_buffer.cpp @@ -0,0 +1,65 @@ +#include "gpu_ball_buffer.hpp" + +#include +#include // std::min +#include // memcpy + +bool GpuBallBuffer::init(SDL_GPUDevice* device) { + Uint32 buf_size = static_cast(MAX_BALLS) * sizeof(BallGPUData); + + // GPU vertex buffer (instance-rate data read by the ball instanced shader) + SDL_GPUBufferCreateInfo buf_info = {}; + buf_info.usage = SDL_GPU_BUFFERUSAGE_VERTEX; + buf_info.size = buf_size; + gpu_buf_ = SDL_CreateGPUBuffer(device, &buf_info); + if (!gpu_buf_) { + SDL_Log("GpuBallBuffer: GPU buffer creation failed: %s", SDL_GetError()); + return false; + } + + // Transfer buffer (upload staging, cycled every frame) + SDL_GPUTransferBufferCreateInfo tb_info = {}; + tb_info.usage = SDL_GPU_TRANSFERBUFFERUSAGE_UPLOAD; + tb_info.size = buf_size; + transfer_buf_ = SDL_CreateGPUTransferBuffer(device, &tb_info); + if (!transfer_buf_) { + SDL_Log("GpuBallBuffer: transfer buffer creation failed: %s", SDL_GetError()); + return false; + } + + SDL_Log("GpuBallBuffer: initialized (capacity %d balls, %.1f MB VRAM)", + MAX_BALLS, buf_size / (1024.0f * 1024.0f)); + return true; +} + +void GpuBallBuffer::destroy(SDL_GPUDevice* device) { + if (!device) return; + if (transfer_buf_) { SDL_ReleaseGPUTransferBuffer(device, transfer_buf_); transfer_buf_ = nullptr; } + if (gpu_buf_) { SDL_ReleaseGPUBuffer(device, gpu_buf_); gpu_buf_ = nullptr; } + count_ = 0; +} + +bool GpuBallBuffer::upload(SDL_GPUDevice* device, SDL_GPUCommandBuffer* cmd, + const BallGPUData* data, int count) { + if (!data || count <= 0) { count_ = 0; return false; } + count = std::min(count, MAX_BALLS); + + Uint32 upload_size = static_cast(count) * sizeof(BallGPUData); + + void* ptr = SDL_MapGPUTransferBuffer(device, transfer_buf_, true /* cycle */); + if (!ptr) { + SDL_Log("GpuBallBuffer: transfer buffer map failed: %s", SDL_GetError()); + return false; + } + memcpy(ptr, data, upload_size); + SDL_UnmapGPUTransferBuffer(device, transfer_buf_); + + SDL_GPUCopyPass* copy = SDL_BeginGPUCopyPass(cmd); + SDL_GPUTransferBufferLocation src = { transfer_buf_, 0 }; + SDL_GPUBufferRegion dst = { gpu_buf_, 0, upload_size }; + SDL_UploadToGPUBuffer(copy, &src, &dst, true /* cycle */); + SDL_EndGPUCopyPass(copy); + + count_ = count; + return true; +} diff --git a/source/gpu/gpu_ball_buffer.hpp b/source/gpu/gpu_ball_buffer.hpp new file mode 100644 index 0000000..07ed78f --- /dev/null +++ b/source/gpu/gpu_ball_buffer.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +// --------------------------------------------------------------------------- +// BallGPUData — 32-byte per-instance record stored in VRAM. +// Positions and sizes pre-converted to NDC space on CPU so the vertex shader +// needs no screen-dimension uniform. +// cx, cy : NDC center (cx = (x + w/2)/sw*2-1, cy = 1-(y+h/2)/sh*2) +// hw, hh : NDC half-size (hw = w/sw, hh = h/sh, both positive) +// r,g,b,a: RGBA in [0,1] +// --------------------------------------------------------------------------- +struct BallGPUData { + float cx, cy; // NDC center + float hw, hh; // NDC half-size (positive) + float r, g, b, a; // RGBA color [0,1] +}; +static_assert(sizeof(BallGPUData) == 32, "BallGPUData must be 32 bytes"); + +// ============================================================================ +// GpuBallBuffer — owns the GPU vertex buffer used for instanced ball rendering. +// +// Usage per frame: +// buffer.upload(device, cmd, data, count); // inside a copy pass +// // Then in render pass: bind buffer, SDL_DrawGPUPrimitives(pass, 6, count, 0, 0) +// ============================================================================ +class GpuBallBuffer { +public: + static constexpr int MAX_BALLS = 500000; + + bool init(SDL_GPUDevice* device); + void destroy(SDL_GPUDevice* device); + + // Upload ball array to GPU via an internal copy pass. + // count is clamped to MAX_BALLS. Returns false on error or empty input. + bool upload(SDL_GPUDevice* device, SDL_GPUCommandBuffer* cmd, + const BallGPUData* data, int count); + + SDL_GPUBuffer* buffer() const { return gpu_buf_; } + int count() const { return count_; } + +private: + SDL_GPUBuffer* gpu_buf_ = nullptr; + SDL_GPUTransferBuffer* transfer_buf_ = nullptr; + int count_ = 0; +}; diff --git a/source/gpu/gpu_pipeline.cpp b/source/gpu/gpu_pipeline.cpp index 1c91f6e..71295f4 100644 --- a/source/gpu/gpu_pipeline.cpp +++ b/source/gpu/gpu_pipeline.cpp @@ -1,8 +1,10 @@ #include "gpu_pipeline.hpp" -#include "gpu_sprite_batch.hpp" // for GpuVertex layout +#include "gpu_sprite_batch.hpp" // for GpuVertex layout +#include "gpu_ball_buffer.hpp" // for BallGPUData layout #include #include // offsetof +#include // strlen // ============================================================================ // MSL Shaders (Metal Shading Language, macOS) @@ -133,6 +135,60 @@ fragment float4 postfx_fs(PostVOut in [[stage_in]], } )"; +// --------------------------------------------------------------------------- +// Ball instanced vertex shader +// Reads BallGPUData as per-instance attributes (input_rate = INSTANCE). +// Generates a 6-vertex quad (2 triangles) per instance using vertex_id. +// +// BallGPUData layout: +// float2 center [[attribute(0)]] — NDC center (cx, cy) +// float2 half [[attribute(1)]] — NDC half-size (hw, hh), both positive +// float4 col [[attribute(2)]] — RGBA [0,1] +// +// NDC convention (SDL / Metal): Y increases upward (+1=top, -1=bottom). +// half.x = w/screen_w, half.y = h/screen_h (positive; Y is not flipped) +// Vertex order: TL TR BL | TR BR BL (CCW winding, standard Metal) +// --------------------------------------------------------------------------- +static const char* kBallInstancedVertMSL = R"( +#include +using namespace metal; + +struct BallInstance { + float2 center [[attribute(0)]]; // NDC center + float2 halfsize [[attribute(1)]]; // NDC half-size (both positive); 'half' is reserved in MSL + float4 col [[attribute(2)]]; +}; +struct BallVOut { + float4 pos [[position]]; + float2 uv; + float4 col; +}; + +vertex BallVOut ball_instanced_vs(BallInstance inst [[stage_in]], + uint vid [[vertex_id]]) { + // Offset signs for each of the 6 vertices (TL TR BL | TR BR BL) + const float2 offsets[6] = { + {-1.0f, 1.0f}, // TL + { 1.0f, 1.0f}, // TR + {-1.0f, -1.0f}, // BL + { 1.0f, 1.0f}, // TR (shared) + { 1.0f, -1.0f}, // BR + {-1.0f, -1.0f}, // BL (shared) + }; + // UV: TL=(0,0) TR=(1,0) BL=(0,1) BR=(1,1) + const float2 uvs[6] = { + {0.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 1.0f}, + {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}, + }; + float2 pos = inst.center + offsets[vid] * inst.halfsize; + BallVOut out; + out.pos = float4(pos.x, pos.y, 0.0f, 1.0f); + out.uv = uvs[vid]; + out.col = inst.col; + return out; +} +)"; + // ============================================================================ // GpuPipeline implementation // ============================================================================ @@ -222,6 +278,71 @@ bool GpuPipeline::init(SDL_GPUDevice* device, return false; } + // ---------------------------------------------------------------- + // Ball instanced pipeline + // Vertex: ball_instanced_vs (BallGPUData per-instance, no index buffer) + // Fragment: sprite_fs (same texture+color blend as sprite pipeline) + // Targets: offscreen (same as sprite pipeline) + // ---------------------------------------------------------------- + SDL_GPUShader* ball_vert = createShader(device, kBallInstancedVertMSL, "ball_instanced_vs", + SDL_GPU_SHADERSTAGE_VERTEX, 0, 0); + SDL_GPUShader* ball_frag = createShader(device, kSpriteFragMSL, "sprite_fs", + SDL_GPU_SHADERSTAGE_FRAGMENT, 1, 0); + if (!ball_vert || !ball_frag) { + SDL_Log("GpuPipeline: failed to create ball instanced shaders"); + if (ball_vert) SDL_ReleaseGPUShader(device, ball_vert); + if (ball_frag) SDL_ReleaseGPUShader(device, ball_frag); + return false; + } + + // Vertex input: BallGPUData as per-instance data (step rate = 1 instance) + SDL_GPUVertexBufferDescription ball_vb_desc = {}; + ball_vb_desc.slot = 0; + ball_vb_desc.pitch = sizeof(BallGPUData); + ball_vb_desc.input_rate = SDL_GPU_VERTEXINPUTRATE_INSTANCE; + ball_vb_desc.instance_step_rate = 1; + + SDL_GPUVertexAttribute ball_attrs[3] = {}; + // attr 0: center (float2) at offset 0 + ball_attrs[0].location = 0; + ball_attrs[0].buffer_slot = 0; + ball_attrs[0].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2; + ball_attrs[0].offset = static_cast(offsetof(BallGPUData, cx)); + // attr 1: half-size (float2) at offset 8 + ball_attrs[1].location = 1; + ball_attrs[1].buffer_slot = 0; + ball_attrs[1].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT2; + ball_attrs[1].offset = static_cast(offsetof(BallGPUData, hw)); + // attr 2: color (float4) at offset 16 + ball_attrs[2].location = 2; + ball_attrs[2].buffer_slot = 0; + ball_attrs[2].format = SDL_GPU_VERTEXELEMENTFORMAT_FLOAT4; + ball_attrs[2].offset = static_cast(offsetof(BallGPUData, r)); + + SDL_GPUVertexInputState ball_vertex_input = {}; + ball_vertex_input.vertex_buffer_descriptions = &ball_vb_desc; + ball_vertex_input.num_vertex_buffers = 1; + ball_vertex_input.vertex_attributes = ball_attrs; + ball_vertex_input.num_vertex_attributes = 3; + + SDL_GPUGraphicsPipelineCreateInfo ball_pipe_info = {}; + ball_pipe_info.vertex_shader = ball_vert; + ball_pipe_info.fragment_shader = ball_frag; + ball_pipe_info.vertex_input_state = ball_vertex_input; + ball_pipe_info.primitive_type = SDL_GPU_PRIMITIVETYPE_TRIANGLELIST; + ball_pipe_info.target_info.num_color_targets = 1; + ball_pipe_info.target_info.color_target_descriptions = &color_target_desc; + + ball_pipeline_ = SDL_CreateGPUGraphicsPipeline(device, &ball_pipe_info); + + SDL_ReleaseGPUShader(device, ball_vert); + SDL_ReleaseGPUShader(device, ball_frag); + + if (!ball_pipeline_) { + SDL_Log("GpuPipeline: ball instanced pipeline creation failed: %s", SDL_GetError()); + return false; + } + // ---------------------------------------------------------------- // UI overlay pipeline (same as sprite but renders to swapchain format) // Reuse sprite shaders with different target format. @@ -275,12 +396,13 @@ bool GpuPipeline::init(SDL_GPUDevice* device, return false; } - SDL_Log("GpuPipeline: sprite and postfx pipelines created successfully"); + SDL_Log("GpuPipeline: all pipelines created successfully"); return true; } void GpuPipeline::destroy(SDL_GPUDevice* device) { if (sprite_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, sprite_pipeline_); sprite_pipeline_ = nullptr; } + if (ball_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, ball_pipeline_); ball_pipeline_ = nullptr; } if (postfx_pipeline_) { SDL_ReleaseGPUGraphicsPipeline(device, postfx_pipeline_); postfx_pipeline_ = nullptr; } } @@ -289,7 +411,8 @@ SDL_GPUShader* GpuPipeline::createShader(SDL_GPUDevice* device, const char* entrypoint, SDL_GPUShaderStage stage, Uint32 num_samplers, - Uint32 num_uniform_buffers) { + Uint32 num_uniform_buffers, + Uint32 num_storage_buffers) { SDL_GPUShaderCreateInfo info = {}; info.code = reinterpret_cast(msl_source); info.code_size = static_cast(strlen(msl_source) + 1); @@ -298,7 +421,7 @@ SDL_GPUShader* GpuPipeline::createShader(SDL_GPUDevice* device, info.stage = stage; info.num_samplers = num_samplers; info.num_storage_textures = 0; - info.num_storage_buffers = 0; + info.num_storage_buffers = num_storage_buffers; info.num_uniform_buffers = num_uniform_buffers; SDL_GPUShader* shader = SDL_CreateGPUShader(device, &info); diff --git a/source/gpu/gpu_pipeline.hpp b/source/gpu/gpu_pipeline.hpp index 8f1e409..fc5e05b 100644 --- a/source/gpu/gpu_pipeline.hpp +++ b/source/gpu/gpu_pipeline.hpp @@ -19,6 +19,9 @@ struct PostFXUniforms { // // sprite_pipeline_ : textured quads, alpha blending. // Vertex layout: GpuVertex (pos float2, uv float2, col float4). +// ball_pipeline_ : instanced ball rendering, alpha blending. +// Vertex layout: BallGPUData as per-instance data (input_rate=INSTANCE). +// 6 procedural vertices per instance (no index buffer). // postfx_pipeline_ : full-screen triangle, no vertex buffer, no blend. // Reads offscreen texture, writes to swapchain. // Accepts PostFXUniforms via fragment uniform buffer slot 0. @@ -33,7 +36,8 @@ public: void destroy(SDL_GPUDevice* device); SDL_GPUGraphicsPipeline* spritePipeline() const { return sprite_pipeline_; } - SDL_GPUGraphicsPipeline* postfxPipeline() const { return postfx_pipeline_; } + SDL_GPUGraphicsPipeline* ballPipeline() const { return ball_pipeline_; } + SDL_GPUGraphicsPipeline* postfxPipeline() const { return postfx_pipeline_; } private: SDL_GPUShader* createShader(SDL_GPUDevice* device, @@ -41,8 +45,10 @@ private: const char* entrypoint, SDL_GPUShaderStage stage, Uint32 num_samplers, - Uint32 num_uniform_buffers); + Uint32 num_uniform_buffers, + Uint32 num_storage_buffers = 0); SDL_GPUGraphicsPipeline* sprite_pipeline_ = nullptr; + SDL_GPUGraphicsPipeline* ball_pipeline_ = nullptr; SDL_GPUGraphicsPipeline* postfx_pipeline_ = nullptr; }; diff --git a/source/input/input_handler.cpp b/source/input/input_handler.cpp index 55aff13..6730651 100644 --- a/source/input/input_handler.cpp +++ b/source/input/input_handler.cpp @@ -105,7 +105,7 @@ bool InputHandler::processEvents(Engine& engine) { // Toggle Modo Boids (comportamiento de enjambre) case SDLK_B: - // engine.toggleBoidsMode(); + engine.toggleBoidsMode(); break; // Ciclar temas de color (movido de B a C)