From 87aa4406910bdd782ab97011ce5735e9f4ecce88 Mon Sep 17 00:00:00 2001 From: SeanOMik Date: Thu, 11 Jul 2024 20:00:46 -0400 Subject: [PATCH] render: create a GpuSlotBuffer for stable indices in a gpu buffer --- Cargo.lock | 25 +++ lyra-game/Cargo.toml | 1 + lyra-game/src/render/graph/passes/shadows.rs | 152 ++++++++++++------- lyra-game/src/render/mod.rs | 5 +- lyra-game/src/render/slot_buffer.rs | 150 ++++++++++++++++++ 5 files changed, 273 insertions(+), 60 deletions(-) create mode 100644 lyra-game/src/render/slot_buffer.rs diff --git a/Cargo.lock b/Cargo.lock index b453320..c0ae382 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1882,6 +1882,7 @@ dependencies = [ "petgraph", "quote", "rectangle-pack", + "round_mult", "rustc-hash", "syn 2.0.51", "thiserror", @@ -2891,6 +2892,15 @@ dependencies = [ "winreg", ] +[[package]] +name = "round_mult" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74bc7d5286c4d36f09aa6ae93f76acf6aa068cd62bc02970a9deb24763655dee" +dependencies = [ + "rustc_version", +] + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -2903,6 +2913,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "0.37.27" @@ -3017,6 +3036,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + [[package]] name = "serde" version = "1.0.194" diff --git a/lyra-game/Cargo.toml b/lyra-game/Cargo.toml index 5420015..c5bd26f 100644 --- a/lyra-game/Cargo.toml +++ b/lyra-game/Cargo.toml @@ -39,6 +39,7 @@ rustc-hash = "1.1.0" petgraph = { version = "0.6.5", features = ["matrix_graph"] } bind_match = "0.1.2" rectangle-pack = "0.4.2" +round_mult = "0.1.3" [features] tracy = ["dep:tracing-tracy"] diff --git a/lyra-game/src/render/graph/passes/shadows.rs b/lyra-game/src/render/graph/passes/shadows.rs index 4dcdafa..9fe0e6f 100644 --- a/lyra-game/src/render/graph/passes/shadows.rs +++ b/lyra-game/src/render/graph/passes/shadows.rs @@ -1,7 +1,14 @@ -use std::{collections::VecDeque, mem, num::NonZeroU64, rc::Rc, sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}}; +use std::{ + collections::VecDeque, + mem, + num::NonZeroU64, + rc::Rc, + sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}, +}; use lyra_ecs::{ - query::{filter::Has, Entities}, AtomicRef, Component, Entity, ResourceData + query::{filter::Has, Entities}, + AtomicRef, Component, Entity, ResourceData, }; use lyra_game_derive::RenderGraphLabel; use lyra_math::Transform; @@ -10,7 +17,12 @@ use tracing::{debug, warn}; use wgpu::util::DeviceExt; use crate::render::{ - graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue}, light::directional::DirectionalLight, resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState}, transform_buffer_storage::TransformBuffers, vertex::Vertex, AtlasViewport, TextureAtlas + graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue}, + light::directional::DirectionalLight, + resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState}, + transform_buffer_storage::TransformBuffers, + vertex::Vertex, + AtlasViewport, GpuSlotBuffer, TextureAtlas, }; use super::{MeshBufferStorage, RenderAssets, RenderMeshes}; @@ -40,8 +52,7 @@ struct LightDepthMap { pub struct ShadowMapsPass { bgl: Arc, atlas_size_buffer: Arc, - light_uniforms_buffer: Arc, - light_uniforms_index: u64, + light_uniforms_buffer: GpuSlotBuffer, uniforms_bg: Arc, /// depth maps for a light owned by an entity. depth_maps: FxHashMap, @@ -63,20 +74,18 @@ impl ShadowMapsPass { let bgl = Arc::new( device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("bgl_shadow_maps_lights"), - entries: &[ - wgpu::BindGroupLayoutEntry { - binding: 0, - visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, - ty: wgpu::BindingType::Buffer { - ty: wgpu::BufferBindingType::Storage { read_only: true }, - has_dynamic_offset: true, - min_binding_size: Some( - NonZeroU64::new(mem::size_of::() as _).unwrap(), - ), - }, - count: None, - } - ], + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: true }, + has_dynamic_offset: true, + min_binding_size: Some( + NonZeroU64::new(mem::size_of::() as _).unwrap(), + ), + }, + count: None, + }], }), ); @@ -87,12 +96,11 @@ impl ShadowMapsPass { SHADOW_SIZE * 4, ); - let atlas_size_buffer = - device.create_buffer_init(&wgpu::util::BufferInitDescriptor { - label: Some("buffer_shadow_maps_atlas_size"), - usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, - contents: bytemuck::bytes_of(&atlas.atlas_size()), - }); + let atlas_size_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("buffer_shadow_maps_atlas_size"), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + contents: bytemuck::bytes_of(&atlas.atlas_size()), + }); let sampler = device.create_sampler(&wgpu::SamplerDescriptor { label: Some("sampler_shadow_map_atlas"), @@ -106,13 +114,15 @@ impl ShadowMapsPass { ..Default::default() }); - let uniforms_buffer = - device.create_buffer(&wgpu::BufferDescriptor { - label: Some("buffer_shadow_maps_light"), - usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, - size: device.limits().max_storage_buffer_binding_size as u64, - mapped_at_creation: false, - }); + let cap = device.limits().max_storage_buffer_binding_size as u64 + / mem::size_of::() as u64; + let uniforms_buffer = GpuSlotBuffer::new_aligned( + device, + Some("buffer_shadow_maps_light"), + wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + cap, + 256, + ); let uniforms_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("bind_group_shadows"), @@ -120,7 +130,7 @@ impl ShadowMapsPass { entries: &[wgpu::BindGroupEntry { binding: 0, resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { - buffer: &uniforms_buffer, + buffer: uniforms_buffer.buffer(), offset: 0, size: Some(NonZeroU64::new(mem::size_of::() as _).unwrap()), }), @@ -129,8 +139,7 @@ impl ShadowMapsPass { Self { bgl, - light_uniforms_buffer: Arc::new(uniforms_buffer), - light_uniforms_index: 0, + light_uniforms_buffer: uniforms_buffer, uniforms_bg: Arc::new(uniforms_bg), atlas_size_buffer: Arc::new(atlas_size_buffer), depth_maps: Default::default(), @@ -145,12 +154,18 @@ impl ShadowMapsPass { } /// Create a depth map and return the id of the depth map in the texture atlas. - fn create_depth_map(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, entity: Entity, light_pos: Transform) -> LightDepthMap { + fn create_depth_map( + &mut self, + queue: &wgpu::Queue, + entity: Entity, + light_pos: Transform, + ) -> LightDepthMap { const NEAR_PLANE: f32 = 0.1; const FAR_PLANE: f32 = 45.0; let mut atlas = self.atlas.get_mut(); - let atlas_index = atlas.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _) + let atlas_index = atlas + .pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _) .expect("failed to pack new shadow map into texture atlas"); let atlas_frame = atlas.texture_viewport(atlas_index); @@ -166,21 +181,19 @@ impl ShadowMapsPass { atlas_frame, }; - let uniform_index = self.light_uniforms_index; + /* let uniform_index = self.light_uniforms_index; self.light_uniforms_index += 1; //self.light_uniforms_buffer let offset = uniform_index_offset(&device.limits(), uniform_index); - queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform)); + queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform)); */ + let uniform_index = self.light_uniforms_buffer.insert(queue, &uniform); let v = LightDepthMap { atlas_index, uniform_index, }; - self.depth_maps.insert( - entity, - v, - ); + self.depth_maps.insert(entity, v); v } @@ -228,7 +241,9 @@ impl Node for ShadowMapsPass { node.add_buffer_slot( ShadowMapsPassSlots::ShadowLightUniformsBuffer, SlotAttribute::Output, - Some(SlotValue::Buffer(self.light_uniforms_buffer.clone())), + Some(SlotValue::Buffer( + self.light_uniforms_buffer.buffer().clone(), + )), ); node.add_buffer_slot( @@ -257,21 +272,24 @@ impl Node for ShadowMapsPass { for (entity, pos, _) in world.view_iter::<(Entities, &Transform, Has)>() { if !self.depth_maps.contains_key(&entity) { - // TODO: dont pack the textures as they're added - let atlas_index = self.create_depth_map(graph.device(), &context.queue, entity, *pos); + let atlas_index = + self.create_depth_map(&context.queue, entity, *pos); index_components_queue.push_back((entity, atlas_index)); debug!("Created depth map for {:?} light entity", entity); } } - + // now consume from the queue adding the components to the entities while let Some((entity, depth)) = index_components_queue.pop_front() { - world.insert(entity, LightShadowMapId { - atlas_index: depth.atlas_index, - uniform_index: depth.uniform_index, - }); + world.insert( + entity, + LightShadowMapId { + atlas_index: depth.atlas_index, + uniform_index: depth.uniform_index, + }, + ); } if self.pipeline.is_none() { @@ -358,11 +376,26 @@ impl Node for ShadowMapsPass { }); pass.set_pipeline(&pipeline); let viewport = atlas.texture_viewport(dir_depth_map.atlas_index); - debug!("Rendering shadow map to viewport: {viewport:?}, uniform index: {}", dir_depth_map.uniform_index); + debug!( + "Rendering shadow map to viewport: {viewport:?}, uniform index: {}", + dir_depth_map.uniform_index + ); // only render to the light's map in the atlas - pass.set_viewport(viewport.offset.x as _, viewport.offset.y as _, viewport.size.x as _, viewport.size.y as _, 0.0, 1.0); + pass.set_viewport( + viewport.offset.x as _, + viewport.offset.y as _, + viewport.size.x as _, + viewport.size.y as _, + 0.0, + 1.0, + ); // only clear the light map in the atlas - pass.set_scissor_rect(viewport.offset.x, viewport.offset.y, viewport.size.x, viewport.size.y); + pass.set_scissor_rect( + viewport.offset.x, + viewport.offset.y, + viewport.size.x, + viewport.size.y, + ); for job in render_meshes.iter() { // get the mesh (containing vertices) and the buffers from storage @@ -373,8 +406,9 @@ impl Node for ShadowMapsPass { } let buffers = buffers.unwrap(); - let uniform_index = uniform_index_offset(&context.device.limits(), dir_depth_map.uniform_index); - //debug!("Uniform offset: {uniform_index}"); + let uniform_index = + self.light_uniforms_buffer + .offset_of(dir_depth_map.uniform_index) as u32; pass.set_bind_group(0, &self.uniforms_bg, &[uniform_index]); // Get the bindgroup for job's transform and bind to it using an offset. @@ -414,7 +448,7 @@ pub struct LightShadowUniform { } /// A component that stores the ID of a shadow map in the shadow map atlas for the entities. -/// +/// /// An entity owns a light. If that light casts shadows, this will contain the ID of the shadow /// map inside of the [`TextureAtlas`]. #[derive(Debug, Default, Copy, Clone, Component)] @@ -447,7 +481,7 @@ impl LightShadowMapAtlas { } } -fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 { +/* fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 { let t = uniform_idx as u32 % (limits.max_storage_buffer_binding_size / mem::size_of::() as u32); t * limits.min_uniform_buffer_offset_alignment -} \ No newline at end of file +} */ diff --git a/lyra-game/src/render/mod.rs b/lyra-game/src/render/mod.rs index 641a6e0..11341f9 100755 --- a/lyra-game/src/render/mod.rs +++ b/lyra-game/src/render/mod.rs @@ -17,4 +17,7 @@ pub mod avec; pub mod graph; mod texture_atlas; -pub use texture_atlas::*; \ No newline at end of file +pub use texture_atlas::*; + +mod slot_buffer; +pub use slot_buffer::*; \ No newline at end of file diff --git a/lyra-game/src/render/slot_buffer.rs b/lyra-game/src/render/slot_buffer.rs new file mode 100644 index 0000000..9bc1264 --- /dev/null +++ b/lyra-game/src/render/slot_buffer.rs @@ -0,0 +1,150 @@ +use std::{collections::VecDeque, marker::PhantomData, mem, num::NonZeroU64, sync::Arc}; + +/// A buffer on the GPU that has persistent indices. +/// +/// `GpuSlotBuffer` allocates a buffer on the GPU and keeps stable indices of elements and +/// reuses ones that were removed. It supports aligned buffers with [`GpuSlotBuffer::new_aligned`], +/// as well as unaligned buffers with [`GpuSlotBuffer::new`]. +pub struct GpuSlotBuffer { + /// The amount of elements that can fit in the buffer. + capacity: u64, + /// The ending point of the buffer elements. + len: u64, + /// The list of dead and reusable indices in the buffer. + dead_indices: VecDeque, + /// The optional alignment of elements in the buffer. + alignment: Option, + /// The actual gpu buffer + buffer: Arc, + _marker: PhantomData, +} + +impl GpuSlotBuffer { + /// Create a new GpuSlotBuffer with unaligned elements. + /// + /// See [`GpuSlotBuffer::new_aligned`]. + pub fn new(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64) -> Self { + Self::new_impl(device, label, usage, capacity, None) + } + + /// Create a new buffer with **aligned** elements. + /// + /// See [`GpuSlotBuffer::new`]. + pub fn new_aligned(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: u64) -> Self { + Self::new_impl(device, label, usage, capacity, Some(alignment)) + } + + fn new_impl(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: Option) -> Self { + let buffer = Arc::new(device.create_buffer(&wgpu::BufferDescriptor { + label, + size: capacity * mem::size_of::() as u64, + usage, + mapped_at_creation: false, + })); + + Self { + capacity, + len: 0, + dead_indices: VecDeque::default(), + buffer, + alignment, + _marker: PhantomData + } + } + + /// Calculates the byte offset in the buffer of the element at `i`. + pub fn offset_of(&self, i: u64) -> u64 { + let offset = i * mem::size_of::() as u64; + + if let Some(align) = self.alignment { + round_mult::up(offset, NonZeroU64::new(align).unwrap()).unwrap() + } else { + offset + } + } + + /// Set an element at `i` in the buffer to `val`. + pub fn set_at(&self, queue: &wgpu::Queue, i: u64, val: &T) { + let offset = self.offset_of(i); + queue.write_buffer(&self.buffer, offset, bytemuck::bytes_of(val)); + } + + /// Attempt to insert an element to the GPU buffer, returning the index it was inserted at. + /// + /// Returns `None` when the buffer has no space to fit the element. + pub fn try_insert(&mut self, queue: &wgpu::Queue, val: &T) -> Option { + // reuse a dead index or get the next one + let i = match self.dead_indices.pop_front() { + Some(i) => i, + None => { + if self.len == self.capacity { + return None; + } + + let i = self.len; + self.len += 1; + i + } + }; + + self.set_at(queue, i, val); + + Some(i) + } + + /// Insert an element to the GPU buffer, returning the index it was inserted at. + /// + /// The index is not guaranteed to be the end of the buffer since this structure reuses + /// indices after they're removed. + /// + /// # Panics + /// Panics if the buffer does not have space to fit `val`, see [`GpuSlotBuffer::try_insert`]. + pub fn insert(&mut self, queue: &wgpu::Queue, val: &T) -> u64 { + self.try_insert(queue, val) + .expect("GPU slot buffer ran out of slots to push elements into") + } + + /// Remove the element at `i`, clearing the elements slot in the buffer. + /// + /// If you do not care that the slot in the buffer is emptied, use + /// [`GpuSlotBuffer::remove_quick`]. + pub fn remove(&mut self, queue: &wgpu::Queue, i: u64) { + let mut zeros = Vec::new(); + zeros.resize(mem::size_of::(), 0); + + let offset = self.offset_of(i); + queue.write_buffer(&self.buffer, offset, bytemuck::cast_slice(zeros.as_slice())); + self.dead_indices.push_back(i); + } + + /// Remove the element at `i` without clearing its space in the buffer. + /// + /// If you want to ensure that the slot in the buffer is emptied, use + /// [`GpuSlotBuffer::remove`]. + pub fn remove_quick(&mut self, i: u64) { + self.dead_indices.push_back(i); + } + + /// Returns the backing [`wgpu::Buffer`]. + pub fn buffer(&self) -> &Arc { + &self.buffer + } + + /// Return the length of the buffer. + /// + /// This value may not reflect the amount of elements that are actually alive in the buffer if + /// elements were removed and not re-added. + pub fn len(&self) -> u64 { + self.len + } + + /// Return the amount of inuse indices in the buffer. + pub fn inuse_len(&self) -> u64 { + self.len - self.dead_indices.len() as u64 + } + + /// Returns the amount of elements the buffer can fit. + pub fn capacity(&self) -> u64 { + self.capacity + } +} \ No newline at end of file