render: create a GpuSlotBuffer for stable indices in a gpu buffer

This commit is contained in:
SeanOMik 2024-07-11 20:00:46 -04:00
parent cc1c482c40
commit 87aa440691
Signed by: SeanOMik
GPG Key ID: FEC9E2FC15235964
5 changed files with 273 additions and 60 deletions

25
Cargo.lock generated
View File

@ -1882,6 +1882,7 @@ dependencies = [
"petgraph", "petgraph",
"quote", "quote",
"rectangle-pack", "rectangle-pack",
"round_mult",
"rustc-hash", "rustc-hash",
"syn 2.0.51", "syn 2.0.51",
"thiserror", "thiserror",
@ -2891,6 +2892,15 @@ dependencies = [
"winreg", "winreg",
] ]
[[package]]
name = "round_mult"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74bc7d5286c4d36f09aa6ae93f76acf6aa068cd62bc02970a9deb24763655dee"
dependencies = [
"rustc_version",
]
[[package]] [[package]]
name = "rustc-demangle" name = "rustc-demangle"
version = "0.1.23" version = "0.1.23"
@ -2903,6 +2913,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "0.37.27" version = "0.37.27"
@ -3017,6 +3036,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.194" version = "1.0.194"

View File

@ -39,6 +39,7 @@ rustc-hash = "1.1.0"
petgraph = { version = "0.6.5", features = ["matrix_graph"] } petgraph = { version = "0.6.5", features = ["matrix_graph"] }
bind_match = "0.1.2" bind_match = "0.1.2"
rectangle-pack = "0.4.2" rectangle-pack = "0.4.2"
round_mult = "0.1.3"
[features] [features]
tracy = ["dep:tracing-tracy"] tracy = ["dep:tracing-tracy"]

View File

@ -1,7 +1,14 @@
use std::{collections::VecDeque, mem, num::NonZeroU64, rc::Rc, sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}}; use std::{
collections::VecDeque,
mem,
num::NonZeroU64,
rc::Rc,
sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
};
use lyra_ecs::{ use lyra_ecs::{
query::{filter::Has, Entities}, AtomicRef, Component, Entity, ResourceData query::{filter::Has, Entities},
AtomicRef, Component, Entity, ResourceData,
}; };
use lyra_game_derive::RenderGraphLabel; use lyra_game_derive::RenderGraphLabel;
use lyra_math::Transform; use lyra_math::Transform;
@ -10,7 +17,12 @@ use tracing::{debug, warn};
use wgpu::util::DeviceExt; use wgpu::util::DeviceExt;
use crate::render::{ use crate::render::{
graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue}, light::directional::DirectionalLight, resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState}, transform_buffer_storage::TransformBuffers, vertex::Vertex, AtlasViewport, TextureAtlas graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue},
light::directional::DirectionalLight,
resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState},
transform_buffer_storage::TransformBuffers,
vertex::Vertex,
AtlasViewport, GpuSlotBuffer, TextureAtlas,
}; };
use super::{MeshBufferStorage, RenderAssets, RenderMeshes}; use super::{MeshBufferStorage, RenderAssets, RenderMeshes};
@ -40,8 +52,7 @@ struct LightDepthMap {
pub struct ShadowMapsPass { pub struct ShadowMapsPass {
bgl: Arc<wgpu::BindGroupLayout>, bgl: Arc<wgpu::BindGroupLayout>,
atlas_size_buffer: Arc<wgpu::Buffer>, atlas_size_buffer: Arc<wgpu::Buffer>,
light_uniforms_buffer: Arc<wgpu::Buffer>, light_uniforms_buffer: GpuSlotBuffer<LightShadowUniform>,
light_uniforms_index: u64,
uniforms_bg: Arc<wgpu::BindGroup>, uniforms_bg: Arc<wgpu::BindGroup>,
/// depth maps for a light owned by an entity. /// depth maps for a light owned by an entity.
depth_maps: FxHashMap<Entity, LightDepthMap>, depth_maps: FxHashMap<Entity, LightDepthMap>,
@ -63,8 +74,7 @@ impl ShadowMapsPass {
let bgl = Arc::new( let bgl = Arc::new(
device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("bgl_shadow_maps_lights"), label: Some("bgl_shadow_maps_lights"),
entries: &[ entries: &[wgpu::BindGroupLayoutEntry {
wgpu::BindGroupLayoutEntry {
binding: 0, binding: 0,
visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
ty: wgpu::BindingType::Buffer { ty: wgpu::BindingType::Buffer {
@ -75,8 +85,7 @@ impl ShadowMapsPass {
), ),
}, },
count: None, count: None,
} }],
],
}), }),
); );
@ -87,8 +96,7 @@ impl ShadowMapsPass {
SHADOW_SIZE * 4, SHADOW_SIZE * 4,
); );
let atlas_size_buffer = let atlas_size_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("buffer_shadow_maps_atlas_size"), label: Some("buffer_shadow_maps_atlas_size"),
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
contents: bytemuck::bytes_of(&atlas.atlas_size()), contents: bytemuck::bytes_of(&atlas.atlas_size()),
@ -106,13 +114,15 @@ impl ShadowMapsPass {
..Default::default() ..Default::default()
}); });
let uniforms_buffer = let cap = device.limits().max_storage_buffer_binding_size as u64
device.create_buffer(&wgpu::BufferDescriptor { / mem::size_of::<LightShadowUniform>() as u64;
label: Some("buffer_shadow_maps_light"), let uniforms_buffer = GpuSlotBuffer::new_aligned(
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, device,
size: device.limits().max_storage_buffer_binding_size as u64, Some("buffer_shadow_maps_light"),
mapped_at_creation: false, wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
}); cap,
256,
);
let uniforms_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { let uniforms_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("bind_group_shadows"), label: Some("bind_group_shadows"),
@ -120,7 +130,7 @@ impl ShadowMapsPass {
entries: &[wgpu::BindGroupEntry { entries: &[wgpu::BindGroupEntry {
binding: 0, binding: 0,
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
buffer: &uniforms_buffer, buffer: uniforms_buffer.buffer(),
offset: 0, offset: 0,
size: Some(NonZeroU64::new(mem::size_of::<LightShadowUniform>() as _).unwrap()), size: Some(NonZeroU64::new(mem::size_of::<LightShadowUniform>() as _).unwrap()),
}), }),
@ -129,8 +139,7 @@ impl ShadowMapsPass {
Self { Self {
bgl, bgl,
light_uniforms_buffer: Arc::new(uniforms_buffer), light_uniforms_buffer: uniforms_buffer,
light_uniforms_index: 0,
uniforms_bg: Arc::new(uniforms_bg), uniforms_bg: Arc::new(uniforms_bg),
atlas_size_buffer: Arc::new(atlas_size_buffer), atlas_size_buffer: Arc::new(atlas_size_buffer),
depth_maps: Default::default(), depth_maps: Default::default(),
@ -145,12 +154,18 @@ impl ShadowMapsPass {
} }
/// Create a depth map and return the id of the depth map in the texture atlas. /// Create a depth map and return the id of the depth map in the texture atlas.
fn create_depth_map(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, entity: Entity, light_pos: Transform) -> LightDepthMap { fn create_depth_map(
&mut self,
queue: &wgpu::Queue,
entity: Entity,
light_pos: Transform,
) -> LightDepthMap {
const NEAR_PLANE: f32 = 0.1; const NEAR_PLANE: f32 = 0.1;
const FAR_PLANE: f32 = 45.0; const FAR_PLANE: f32 = 45.0;
let mut atlas = self.atlas.get_mut(); let mut atlas = self.atlas.get_mut();
let atlas_index = atlas.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _) let atlas_index = atlas
.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _)
.expect("failed to pack new shadow map into texture atlas"); .expect("failed to pack new shadow map into texture atlas");
let atlas_frame = atlas.texture_viewport(atlas_index); let atlas_frame = atlas.texture_viewport(atlas_index);
@ -166,21 +181,19 @@ impl ShadowMapsPass {
atlas_frame, atlas_frame,
}; };
let uniform_index = self.light_uniforms_index; /* let uniform_index = self.light_uniforms_index;
self.light_uniforms_index += 1; self.light_uniforms_index += 1;
//self.light_uniforms_buffer //self.light_uniforms_buffer
let offset = uniform_index_offset(&device.limits(), uniform_index); let offset = uniform_index_offset(&device.limits(), uniform_index);
queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform)); queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform)); */
let uniform_index = self.light_uniforms_buffer.insert(queue, &uniform);
let v = LightDepthMap { let v = LightDepthMap {
atlas_index, atlas_index,
uniform_index, uniform_index,
}; };
self.depth_maps.insert( self.depth_maps.insert(entity, v);
entity,
v,
);
v v
} }
@ -228,7 +241,9 @@ impl Node for ShadowMapsPass {
node.add_buffer_slot( node.add_buffer_slot(
ShadowMapsPassSlots::ShadowLightUniformsBuffer, ShadowMapsPassSlots::ShadowLightUniformsBuffer,
SlotAttribute::Output, SlotAttribute::Output,
Some(SlotValue::Buffer(self.light_uniforms_buffer.clone())), Some(SlotValue::Buffer(
self.light_uniforms_buffer.buffer().clone(),
)),
); );
node.add_buffer_slot( node.add_buffer_slot(
@ -257,9 +272,9 @@ impl Node for ShadowMapsPass {
for (entity, pos, _) in world.view_iter::<(Entities, &Transform, Has<DirectionalLight>)>() { for (entity, pos, _) in world.view_iter::<(Entities, &Transform, Has<DirectionalLight>)>() {
if !self.depth_maps.contains_key(&entity) { if !self.depth_maps.contains_key(&entity) {
// TODO: dont pack the textures as they're added // TODO: dont pack the textures as they're added
let atlas_index = self.create_depth_map(graph.device(), &context.queue, entity, *pos); let atlas_index =
self.create_depth_map(&context.queue, entity, *pos);
index_components_queue.push_back((entity, atlas_index)); index_components_queue.push_back((entity, atlas_index));
debug!("Created depth map for {:?} light entity", entity); debug!("Created depth map for {:?} light entity", entity);
@ -268,10 +283,13 @@ impl Node for ShadowMapsPass {
// now consume from the queue adding the components to the entities // now consume from the queue adding the components to the entities
while let Some((entity, depth)) = index_components_queue.pop_front() { while let Some((entity, depth)) = index_components_queue.pop_front() {
world.insert(entity, LightShadowMapId { world.insert(
entity,
LightShadowMapId {
atlas_index: depth.atlas_index, atlas_index: depth.atlas_index,
uniform_index: depth.uniform_index, uniform_index: depth.uniform_index,
}); },
);
} }
if self.pipeline.is_none() { if self.pipeline.is_none() {
@ -358,11 +376,26 @@ impl Node for ShadowMapsPass {
}); });
pass.set_pipeline(&pipeline); pass.set_pipeline(&pipeline);
let viewport = atlas.texture_viewport(dir_depth_map.atlas_index); let viewport = atlas.texture_viewport(dir_depth_map.atlas_index);
debug!("Rendering shadow map to viewport: {viewport:?}, uniform index: {}", dir_depth_map.uniform_index); debug!(
"Rendering shadow map to viewport: {viewport:?}, uniform index: {}",
dir_depth_map.uniform_index
);
// only render to the light's map in the atlas // only render to the light's map in the atlas
pass.set_viewport(viewport.offset.x as _, viewport.offset.y as _, viewport.size.x as _, viewport.size.y as _, 0.0, 1.0); pass.set_viewport(
viewport.offset.x as _,
viewport.offset.y as _,
viewport.size.x as _,
viewport.size.y as _,
0.0,
1.0,
);
// only clear the light map in the atlas // only clear the light map in the atlas
pass.set_scissor_rect(viewport.offset.x, viewport.offset.y, viewport.size.x, viewport.size.y); pass.set_scissor_rect(
viewport.offset.x,
viewport.offset.y,
viewport.size.x,
viewport.size.y,
);
for job in render_meshes.iter() { for job in render_meshes.iter() {
// get the mesh (containing vertices) and the buffers from storage // get the mesh (containing vertices) and the buffers from storage
@ -373,8 +406,9 @@ impl Node for ShadowMapsPass {
} }
let buffers = buffers.unwrap(); let buffers = buffers.unwrap();
let uniform_index = uniform_index_offset(&context.device.limits(), dir_depth_map.uniform_index); let uniform_index =
//debug!("Uniform offset: {uniform_index}"); self.light_uniforms_buffer
.offset_of(dir_depth_map.uniform_index) as u32;
pass.set_bind_group(0, &self.uniforms_bg, &[uniform_index]); pass.set_bind_group(0, &self.uniforms_bg, &[uniform_index]);
// Get the bindgroup for job's transform and bind to it using an offset. // Get the bindgroup for job's transform and bind to it using an offset.
@ -447,7 +481,7 @@ impl LightShadowMapAtlas {
} }
} }
fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 { /* fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 {
let t = uniform_idx as u32 % (limits.max_storage_buffer_binding_size / mem::size_of::<LightShadowUniform>() as u32); let t = uniform_idx as u32 % (limits.max_storage_buffer_binding_size / mem::size_of::<LightShadowUniform>() as u32);
t * limits.min_uniform_buffer_offset_alignment t * limits.min_uniform_buffer_offset_alignment
} } */

View File

@ -18,3 +18,6 @@ pub mod graph;
mod texture_atlas; mod texture_atlas;
pub use texture_atlas::*; pub use texture_atlas::*;
mod slot_buffer;
pub use slot_buffer::*;

View File

@ -0,0 +1,150 @@
use std::{collections::VecDeque, marker::PhantomData, mem, num::NonZeroU64, sync::Arc};
/// A buffer on the GPU that has persistent indices.
///
/// `GpuSlotBuffer` allocates a buffer on the GPU and keeps stable indices of elements and
/// reuses ones that were removed. It supports aligned buffers with [`GpuSlotBuffer::new_aligned`],
/// as well as unaligned buffers with [`GpuSlotBuffer::new`].
pub struct GpuSlotBuffer<T: bytemuck::Pod + bytemuck::Zeroable> {
/// The amount of elements that can fit in the buffer.
capacity: u64,
/// The ending point of the buffer elements.
len: u64,
/// The list of dead and reusable indices in the buffer.
dead_indices: VecDeque<u64>,
/// The optional alignment of elements in the buffer.
alignment: Option<u64>,
/// The actual gpu buffer
buffer: Arc<wgpu::Buffer>,
_marker: PhantomData<T>,
}
impl<T: bytemuck::Pod + bytemuck::Zeroable> GpuSlotBuffer<T> {
/// Create a new GpuSlotBuffer with unaligned elements.
///
/// See [`GpuSlotBuffer::new_aligned`].
pub fn new(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64) -> Self {
Self::new_impl(device, label, usage, capacity, None)
}
/// Create a new buffer with **aligned** elements.
///
/// See [`GpuSlotBuffer::new`].
pub fn new_aligned(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: u64) -> Self {
Self::new_impl(device, label, usage, capacity, Some(alignment))
}
fn new_impl(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: Option<u64>) -> Self {
let buffer = Arc::new(device.create_buffer(&wgpu::BufferDescriptor {
label,
size: capacity * mem::size_of::<T>() as u64,
usage,
mapped_at_creation: false,
}));
Self {
capacity,
len: 0,
dead_indices: VecDeque::default(),
buffer,
alignment,
_marker: PhantomData
}
}
/// Calculates the byte offset in the buffer of the element at `i`.
pub fn offset_of(&self, i: u64) -> u64 {
let offset = i * mem::size_of::<T>() as u64;
if let Some(align) = self.alignment {
round_mult::up(offset, NonZeroU64::new(align).unwrap()).unwrap()
} else {
offset
}
}
/// Set an element at `i` in the buffer to `val`.
pub fn set_at(&self, queue: &wgpu::Queue, i: u64, val: &T) {
let offset = self.offset_of(i);
queue.write_buffer(&self.buffer, offset, bytemuck::bytes_of(val));
}
/// Attempt to insert an element to the GPU buffer, returning the index it was inserted at.
///
/// Returns `None` when the buffer has no space to fit the element.
pub fn try_insert(&mut self, queue: &wgpu::Queue, val: &T) -> Option<u64> {
// reuse a dead index or get the next one
let i = match self.dead_indices.pop_front() {
Some(i) => i,
None => {
if self.len == self.capacity {
return None;
}
let i = self.len;
self.len += 1;
i
}
};
self.set_at(queue, i, val);
Some(i)
}
/// Insert an element to the GPU buffer, returning the index it was inserted at.
///
/// The index is not guaranteed to be the end of the buffer since this structure reuses
/// indices after they're removed.
///
/// # Panics
/// Panics if the buffer does not have space to fit `val`, see [`GpuSlotBuffer::try_insert`].
pub fn insert(&mut self, queue: &wgpu::Queue, val: &T) -> u64 {
self.try_insert(queue, val)
.expect("GPU slot buffer ran out of slots to push elements into")
}
/// Remove the element at `i`, clearing the elements slot in the buffer.
///
/// If you do not care that the slot in the buffer is emptied, use
/// [`GpuSlotBuffer::remove_quick`].
pub fn remove(&mut self, queue: &wgpu::Queue, i: u64) {
let mut zeros = Vec::new();
zeros.resize(mem::size_of::<T>(), 0);
let offset = self.offset_of(i);
queue.write_buffer(&self.buffer, offset, bytemuck::cast_slice(zeros.as_slice()));
self.dead_indices.push_back(i);
}
/// Remove the element at `i` without clearing its space in the buffer.
///
/// If you want to ensure that the slot in the buffer is emptied, use
/// [`GpuSlotBuffer::remove`].
pub fn remove_quick(&mut self, i: u64) {
self.dead_indices.push_back(i);
}
/// Returns the backing [`wgpu::Buffer`].
pub fn buffer(&self) -> &Arc<wgpu::Buffer> {
&self.buffer
}
/// Return the length of the buffer.
///
/// This value may not reflect the amount of elements that are actually alive in the buffer if
/// elements were removed and not re-added.
pub fn len(&self) -> u64 {
self.len
}
/// Return the amount of inuse indices in the buffer.
pub fn inuse_len(&self) -> u64 {
self.len - self.dead_indices.len() as u64
}
/// Returns the amount of elements the buffer can fit.
pub fn capacity(&self) -> u64 {
self.capacity
}
}