render: create a GpuSlotBuffer for stable indices in a gpu buffer

This commit is contained in:
SeanOMik 2024-07-11 20:00:46 -04:00
parent cc1c482c40
commit 87aa440691
Signed by: SeanOMik
GPG Key ID: FEC9E2FC15235964
5 changed files with 273 additions and 60 deletions

25
Cargo.lock generated
View File

@ -1882,6 +1882,7 @@ dependencies = [
"petgraph",
"quote",
"rectangle-pack",
"round_mult",
"rustc-hash",
"syn 2.0.51",
"thiserror",
@ -2891,6 +2892,15 @@ dependencies = [
"winreg",
]
[[package]]
name = "round_mult"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74bc7d5286c4d36f09aa6ae93f76acf6aa068cd62bc02970a9deb24763655dee"
dependencies = [
"rustc_version",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
@ -2903,6 +2913,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.37.27"
@ -3017,6 +3036,12 @@ dependencies = [
"libc",
]
[[package]]
name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "serde"
version = "1.0.194"

View File

@ -39,6 +39,7 @@ rustc-hash = "1.1.0"
petgraph = { version = "0.6.5", features = ["matrix_graph"] }
bind_match = "0.1.2"
rectangle-pack = "0.4.2"
round_mult = "0.1.3"
[features]
tracy = ["dep:tracing-tracy"]

View File

@ -1,7 +1,14 @@
use std::{collections::VecDeque, mem, num::NonZeroU64, rc::Rc, sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}};
use std::{
collections::VecDeque,
mem,
num::NonZeroU64,
rc::Rc,
sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
};
use lyra_ecs::{
query::{filter::Has, Entities}, AtomicRef, Component, Entity, ResourceData
query::{filter::Has, Entities},
AtomicRef, Component, Entity, ResourceData,
};
use lyra_game_derive::RenderGraphLabel;
use lyra_math::Transform;
@ -10,7 +17,12 @@ use tracing::{debug, warn};
use wgpu::util::DeviceExt;
use crate::render::{
graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue}, light::directional::DirectionalLight, resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState}, transform_buffer_storage::TransformBuffers, vertex::Vertex, AtlasViewport, TextureAtlas
graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue},
light::directional::DirectionalLight,
resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState},
transform_buffer_storage::TransformBuffers,
vertex::Vertex,
AtlasViewport, GpuSlotBuffer, TextureAtlas,
};
use super::{MeshBufferStorage, RenderAssets, RenderMeshes};
@ -40,8 +52,7 @@ struct LightDepthMap {
pub struct ShadowMapsPass {
bgl: Arc<wgpu::BindGroupLayout>,
atlas_size_buffer: Arc<wgpu::Buffer>,
light_uniforms_buffer: Arc<wgpu::Buffer>,
light_uniforms_index: u64,
light_uniforms_buffer: GpuSlotBuffer<LightShadowUniform>,
uniforms_bg: Arc<wgpu::BindGroup>,
/// depth maps for a light owned by an entity.
depth_maps: FxHashMap<Entity, LightDepthMap>,
@ -63,8 +74,7 @@ impl ShadowMapsPass {
let bgl = Arc::new(
device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("bgl_shadow_maps_lights"),
entries: &[
wgpu::BindGroupLayoutEntry {
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
ty: wgpu::BindingType::Buffer {
@ -75,8 +85,7 @@ impl ShadowMapsPass {
),
},
count: None,
}
],
}],
}),
);
@ -87,8 +96,7 @@ impl ShadowMapsPass {
SHADOW_SIZE * 4,
);
let atlas_size_buffer =
device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
let atlas_size_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("buffer_shadow_maps_atlas_size"),
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
contents: bytemuck::bytes_of(&atlas.atlas_size()),
@ -106,13 +114,15 @@ impl ShadowMapsPass {
..Default::default()
});
let uniforms_buffer =
device.create_buffer(&wgpu::BufferDescriptor {
label: Some("buffer_shadow_maps_light"),
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
size: device.limits().max_storage_buffer_binding_size as u64,
mapped_at_creation: false,
});
let cap = device.limits().max_storage_buffer_binding_size as u64
/ mem::size_of::<LightShadowUniform>() as u64;
let uniforms_buffer = GpuSlotBuffer::new_aligned(
device,
Some("buffer_shadow_maps_light"),
wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
cap,
256,
);
let uniforms_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("bind_group_shadows"),
@ -120,7 +130,7 @@ impl ShadowMapsPass {
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
buffer: &uniforms_buffer,
buffer: uniforms_buffer.buffer(),
offset: 0,
size: Some(NonZeroU64::new(mem::size_of::<LightShadowUniform>() as _).unwrap()),
}),
@ -129,8 +139,7 @@ impl ShadowMapsPass {
Self {
bgl,
light_uniforms_buffer: Arc::new(uniforms_buffer),
light_uniforms_index: 0,
light_uniforms_buffer: uniforms_buffer,
uniforms_bg: Arc::new(uniforms_bg),
atlas_size_buffer: Arc::new(atlas_size_buffer),
depth_maps: Default::default(),
@ -145,12 +154,18 @@ impl ShadowMapsPass {
}
/// Create a depth map and return the id of the depth map in the texture atlas.
fn create_depth_map(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, entity: Entity, light_pos: Transform) -> LightDepthMap {
fn create_depth_map(
&mut self,
queue: &wgpu::Queue,
entity: Entity,
light_pos: Transform,
) -> LightDepthMap {
const NEAR_PLANE: f32 = 0.1;
const FAR_PLANE: f32 = 45.0;
let mut atlas = self.atlas.get_mut();
let atlas_index = atlas.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _)
let atlas_index = atlas
.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _)
.expect("failed to pack new shadow map into texture atlas");
let atlas_frame = atlas.texture_viewport(atlas_index);
@ -166,21 +181,19 @@ impl ShadowMapsPass {
atlas_frame,
};
let uniform_index = self.light_uniforms_index;
/* let uniform_index = self.light_uniforms_index;
self.light_uniforms_index += 1;
//self.light_uniforms_buffer
let offset = uniform_index_offset(&device.limits(), uniform_index);
queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform));
queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform)); */
let uniform_index = self.light_uniforms_buffer.insert(queue, &uniform);
let v = LightDepthMap {
atlas_index,
uniform_index,
};
self.depth_maps.insert(
entity,
v,
);
self.depth_maps.insert(entity, v);
v
}
@ -228,7 +241,9 @@ impl Node for ShadowMapsPass {
node.add_buffer_slot(
ShadowMapsPassSlots::ShadowLightUniformsBuffer,
SlotAttribute::Output,
Some(SlotValue::Buffer(self.light_uniforms_buffer.clone())),
Some(SlotValue::Buffer(
self.light_uniforms_buffer.buffer().clone(),
)),
);
node.add_buffer_slot(
@ -257,9 +272,9 @@ impl Node for ShadowMapsPass {
for (entity, pos, _) in world.view_iter::<(Entities, &Transform, Has<DirectionalLight>)>() {
if !self.depth_maps.contains_key(&entity) {
// TODO: dont pack the textures as they're added
let atlas_index = self.create_depth_map(graph.device(), &context.queue, entity, *pos);
let atlas_index =
self.create_depth_map(&context.queue, entity, *pos);
index_components_queue.push_back((entity, atlas_index));
debug!("Created depth map for {:?} light entity", entity);
@ -268,10 +283,13 @@ impl Node for ShadowMapsPass {
// now consume from the queue adding the components to the entities
while let Some((entity, depth)) = index_components_queue.pop_front() {
world.insert(entity, LightShadowMapId {
world.insert(
entity,
LightShadowMapId {
atlas_index: depth.atlas_index,
uniform_index: depth.uniform_index,
});
},
);
}
if self.pipeline.is_none() {
@ -358,11 +376,26 @@ impl Node for ShadowMapsPass {
});
pass.set_pipeline(&pipeline);
let viewport = atlas.texture_viewport(dir_depth_map.atlas_index);
debug!("Rendering shadow map to viewport: {viewport:?}, uniform index: {}", dir_depth_map.uniform_index);
debug!(
"Rendering shadow map to viewport: {viewport:?}, uniform index: {}",
dir_depth_map.uniform_index
);
// only render to the light's map in the atlas
pass.set_viewport(viewport.offset.x as _, viewport.offset.y as _, viewport.size.x as _, viewport.size.y as _, 0.0, 1.0);
pass.set_viewport(
viewport.offset.x as _,
viewport.offset.y as _,
viewport.size.x as _,
viewport.size.y as _,
0.0,
1.0,
);
// only clear the light map in the atlas
pass.set_scissor_rect(viewport.offset.x, viewport.offset.y, viewport.size.x, viewport.size.y);
pass.set_scissor_rect(
viewport.offset.x,
viewport.offset.y,
viewport.size.x,
viewport.size.y,
);
for job in render_meshes.iter() {
// get the mesh (containing vertices) and the buffers from storage
@ -373,8 +406,9 @@ impl Node for ShadowMapsPass {
}
let buffers = buffers.unwrap();
let uniform_index = uniform_index_offset(&context.device.limits(), dir_depth_map.uniform_index);
//debug!("Uniform offset: {uniform_index}");
let uniform_index =
self.light_uniforms_buffer
.offset_of(dir_depth_map.uniform_index) as u32;
pass.set_bind_group(0, &self.uniforms_bg, &[uniform_index]);
// Get the bindgroup for job's transform and bind to it using an offset.
@ -447,7 +481,7 @@ impl LightShadowMapAtlas {
}
}
fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 {
/* fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 {
let t = uniform_idx as u32 % (limits.max_storage_buffer_binding_size / mem::size_of::<LightShadowUniform>() as u32);
t * limits.min_uniform_buffer_offset_alignment
}
} */

View File

@ -18,3 +18,6 @@ pub mod graph;
mod texture_atlas;
pub use texture_atlas::*;
mod slot_buffer;
pub use slot_buffer::*;

View File

@ -0,0 +1,150 @@
use std::{collections::VecDeque, marker::PhantomData, mem, num::NonZeroU64, sync::Arc};
/// A buffer on the GPU that has persistent indices.
///
/// `GpuSlotBuffer` allocates a buffer on the GPU and keeps stable indices of elements and
/// reuses ones that were removed. It supports aligned buffers with [`GpuSlotBuffer::new_aligned`],
/// as well as unaligned buffers with [`GpuSlotBuffer::new`].
pub struct GpuSlotBuffer<T: bytemuck::Pod + bytemuck::Zeroable> {
/// The amount of elements that can fit in the buffer.
capacity: u64,
/// The ending point of the buffer elements.
len: u64,
/// The list of dead and reusable indices in the buffer.
dead_indices: VecDeque<u64>,
/// The optional alignment of elements in the buffer.
alignment: Option<u64>,
/// The actual gpu buffer
buffer: Arc<wgpu::Buffer>,
_marker: PhantomData<T>,
}
impl<T: bytemuck::Pod + bytemuck::Zeroable> GpuSlotBuffer<T> {
/// Create a new GpuSlotBuffer with unaligned elements.
///
/// See [`GpuSlotBuffer::new_aligned`].
pub fn new(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64) -> Self {
Self::new_impl(device, label, usage, capacity, None)
}
/// Create a new buffer with **aligned** elements.
///
/// See [`GpuSlotBuffer::new`].
pub fn new_aligned(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: u64) -> Self {
Self::new_impl(device, label, usage, capacity, Some(alignment))
}
fn new_impl(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: Option<u64>) -> Self {
let buffer = Arc::new(device.create_buffer(&wgpu::BufferDescriptor {
label,
size: capacity * mem::size_of::<T>() as u64,
usage,
mapped_at_creation: false,
}));
Self {
capacity,
len: 0,
dead_indices: VecDeque::default(),
buffer,
alignment,
_marker: PhantomData
}
}
/// Calculates the byte offset in the buffer of the element at `i`.
pub fn offset_of(&self, i: u64) -> u64 {
let offset = i * mem::size_of::<T>() as u64;
if let Some(align) = self.alignment {
round_mult::up(offset, NonZeroU64::new(align).unwrap()).unwrap()
} else {
offset
}
}
/// Set an element at `i` in the buffer to `val`.
pub fn set_at(&self, queue: &wgpu::Queue, i: u64, val: &T) {
let offset = self.offset_of(i);
queue.write_buffer(&self.buffer, offset, bytemuck::bytes_of(val));
}
/// Attempt to insert an element to the GPU buffer, returning the index it was inserted at.
///
/// Returns `None` when the buffer has no space to fit the element.
pub fn try_insert(&mut self, queue: &wgpu::Queue, val: &T) -> Option<u64> {
// reuse a dead index or get the next one
let i = match self.dead_indices.pop_front() {
Some(i) => i,
None => {
if self.len == self.capacity {
return None;
}
let i = self.len;
self.len += 1;
i
}
};
self.set_at(queue, i, val);
Some(i)
}
/// Insert an element to the GPU buffer, returning the index it was inserted at.
///
/// The index is not guaranteed to be the end of the buffer since this structure reuses
/// indices after they're removed.
///
/// # Panics
/// Panics if the buffer does not have space to fit `val`, see [`GpuSlotBuffer::try_insert`].
pub fn insert(&mut self, queue: &wgpu::Queue, val: &T) -> u64 {
self.try_insert(queue, val)
.expect("GPU slot buffer ran out of slots to push elements into")
}
/// Remove the element at `i`, clearing the elements slot in the buffer.
///
/// If you do not care that the slot in the buffer is emptied, use
/// [`GpuSlotBuffer::remove_quick`].
pub fn remove(&mut self, queue: &wgpu::Queue, i: u64) {
let mut zeros = Vec::new();
zeros.resize(mem::size_of::<T>(), 0);
let offset = self.offset_of(i);
queue.write_buffer(&self.buffer, offset, bytemuck::cast_slice(zeros.as_slice()));
self.dead_indices.push_back(i);
}
/// Remove the element at `i` without clearing its space in the buffer.
///
/// If you want to ensure that the slot in the buffer is emptied, use
/// [`GpuSlotBuffer::remove`].
pub fn remove_quick(&mut self, i: u64) {
self.dead_indices.push_back(i);
}
/// Returns the backing [`wgpu::Buffer`].
pub fn buffer(&self) -> &Arc<wgpu::Buffer> {
&self.buffer
}
/// Return the length of the buffer.
///
/// This value may not reflect the amount of elements that are actually alive in the buffer if
/// elements were removed and not re-added.
pub fn len(&self) -> u64 {
self.len
}
/// Return the amount of inuse indices in the buffer.
pub fn inuse_len(&self) -> u64 {
self.len - self.dead_indices.len() as u64
}
/// Returns the amount of elements the buffer can fit.
pub fn capacity(&self) -> u64 {
self.capacity
}
}