render: create a GpuSlotBuffer for stable indices in a gpu buffer
This commit is contained in:
parent
cc1c482c40
commit
87aa440691
|
@ -1882,6 +1882,7 @@ dependencies = [
|
|||
"petgraph",
|
||||
"quote",
|
||||
"rectangle-pack",
|
||||
"round_mult",
|
||||
"rustc-hash",
|
||||
"syn 2.0.51",
|
||||
"thiserror",
|
||||
|
@ -2891,6 +2892,15 @@ dependencies = [
|
|||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "round_mult"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74bc7d5286c4d36f09aa6ae93f76acf6aa068cd62bc02970a9deb24763655dee"
|
||||
dependencies = [
|
||||
"rustc_version",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
|
@ -2903,6 +2913,15 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
|
||||
dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.37.27"
|
||||
|
@ -3017,6 +3036,12 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.194"
|
||||
|
|
|
@ -39,6 +39,7 @@ rustc-hash = "1.1.0"
|
|||
petgraph = { version = "0.6.5", features = ["matrix_graph"] }
|
||||
bind_match = "0.1.2"
|
||||
rectangle-pack = "0.4.2"
|
||||
round_mult = "0.1.3"
|
||||
|
||||
[features]
|
||||
tracy = ["dep:tracing-tracy"]
|
||||
|
|
|
@ -1,7 +1,14 @@
|
|||
use std::{collections::VecDeque, mem, num::NonZeroU64, rc::Rc, sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}};
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
mem,
|
||||
num::NonZeroU64,
|
||||
rc::Rc,
|
||||
sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
|
||||
};
|
||||
|
||||
use lyra_ecs::{
|
||||
query::{filter::Has, Entities}, AtomicRef, Component, Entity, ResourceData
|
||||
query::{filter::Has, Entities},
|
||||
AtomicRef, Component, Entity, ResourceData,
|
||||
};
|
||||
use lyra_game_derive::RenderGraphLabel;
|
||||
use lyra_math::Transform;
|
||||
|
@ -10,7 +17,12 @@ use tracing::{debug, warn};
|
|||
use wgpu::util::DeviceExt;
|
||||
|
||||
use crate::render::{
|
||||
graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue}, light::directional::DirectionalLight, resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState}, transform_buffer_storage::TransformBuffers, vertex::Vertex, AtlasViewport, TextureAtlas
|
||||
graph::{Node, NodeDesc, NodeType, SlotAttribute, SlotValue},
|
||||
light::directional::DirectionalLight,
|
||||
resource::{RenderPipeline, RenderPipelineDescriptor, Shader, VertexState},
|
||||
transform_buffer_storage::TransformBuffers,
|
||||
vertex::Vertex,
|
||||
AtlasViewport, GpuSlotBuffer, TextureAtlas,
|
||||
};
|
||||
|
||||
use super::{MeshBufferStorage, RenderAssets, RenderMeshes};
|
||||
|
@ -40,8 +52,7 @@ struct LightDepthMap {
|
|||
pub struct ShadowMapsPass {
|
||||
bgl: Arc<wgpu::BindGroupLayout>,
|
||||
atlas_size_buffer: Arc<wgpu::Buffer>,
|
||||
light_uniforms_buffer: Arc<wgpu::Buffer>,
|
||||
light_uniforms_index: u64,
|
||||
light_uniforms_buffer: GpuSlotBuffer<LightShadowUniform>,
|
||||
uniforms_bg: Arc<wgpu::BindGroup>,
|
||||
/// depth maps for a light owned by an entity.
|
||||
depth_maps: FxHashMap<Entity, LightDepthMap>,
|
||||
|
@ -63,20 +74,18 @@ impl ShadowMapsPass {
|
|||
let bgl = Arc::new(
|
||||
device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("bgl_shadow_maps_lights"),
|
||||
entries: &[
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Storage { read_only: true },
|
||||
has_dynamic_offset: true,
|
||||
min_binding_size: Some(
|
||||
NonZeroU64::new(mem::size_of::<LightShadowUniform>() as _).unwrap(),
|
||||
),
|
||||
},
|
||||
count: None,
|
||||
}
|
||||
],
|
||||
entries: &[wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Storage { read_only: true },
|
||||
has_dynamic_offset: true,
|
||||
min_binding_size: Some(
|
||||
NonZeroU64::new(mem::size_of::<LightShadowUniform>() as _).unwrap(),
|
||||
),
|
||||
},
|
||||
count: None,
|
||||
}],
|
||||
}),
|
||||
);
|
||||
|
||||
|
@ -87,12 +96,11 @@ impl ShadowMapsPass {
|
|||
SHADOW_SIZE * 4,
|
||||
);
|
||||
|
||||
let atlas_size_buffer =
|
||||
device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||
label: Some("buffer_shadow_maps_atlas_size"),
|
||||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
|
||||
contents: bytemuck::bytes_of(&atlas.atlas_size()),
|
||||
});
|
||||
let atlas_size_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||
label: Some("buffer_shadow_maps_atlas_size"),
|
||||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
|
||||
contents: bytemuck::bytes_of(&atlas.atlas_size()),
|
||||
});
|
||||
|
||||
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
|
||||
label: Some("sampler_shadow_map_atlas"),
|
||||
|
@ -106,13 +114,15 @@ impl ShadowMapsPass {
|
|||
..Default::default()
|
||||
});
|
||||
|
||||
let uniforms_buffer =
|
||||
device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("buffer_shadow_maps_light"),
|
||||
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
|
||||
size: device.limits().max_storage_buffer_binding_size as u64,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
let cap = device.limits().max_storage_buffer_binding_size as u64
|
||||
/ mem::size_of::<LightShadowUniform>() as u64;
|
||||
let uniforms_buffer = GpuSlotBuffer::new_aligned(
|
||||
device,
|
||||
Some("buffer_shadow_maps_light"),
|
||||
wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
|
||||
cap,
|
||||
256,
|
||||
);
|
||||
|
||||
let uniforms_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("bind_group_shadows"),
|
||||
|
@ -120,7 +130,7 @@ impl ShadowMapsPass {
|
|||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
|
||||
buffer: &uniforms_buffer,
|
||||
buffer: uniforms_buffer.buffer(),
|
||||
offset: 0,
|
||||
size: Some(NonZeroU64::new(mem::size_of::<LightShadowUniform>() as _).unwrap()),
|
||||
}),
|
||||
|
@ -129,8 +139,7 @@ impl ShadowMapsPass {
|
|||
|
||||
Self {
|
||||
bgl,
|
||||
light_uniforms_buffer: Arc::new(uniforms_buffer),
|
||||
light_uniforms_index: 0,
|
||||
light_uniforms_buffer: uniforms_buffer,
|
||||
uniforms_bg: Arc::new(uniforms_bg),
|
||||
atlas_size_buffer: Arc::new(atlas_size_buffer),
|
||||
depth_maps: Default::default(),
|
||||
|
@ -145,12 +154,18 @@ impl ShadowMapsPass {
|
|||
}
|
||||
|
||||
/// Create a depth map and return the id of the depth map in the texture atlas.
|
||||
fn create_depth_map(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, entity: Entity, light_pos: Transform) -> LightDepthMap {
|
||||
fn create_depth_map(
|
||||
&mut self,
|
||||
queue: &wgpu::Queue,
|
||||
entity: Entity,
|
||||
light_pos: Transform,
|
||||
) -> LightDepthMap {
|
||||
const NEAR_PLANE: f32 = 0.1;
|
||||
const FAR_PLANE: f32 = 45.0;
|
||||
|
||||
let mut atlas = self.atlas.get_mut();
|
||||
let atlas_index = atlas.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _)
|
||||
let atlas_index = atlas
|
||||
.pack_new_texture(SHADOW_SIZE.x as _, SHADOW_SIZE.y as _)
|
||||
.expect("failed to pack new shadow map into texture atlas");
|
||||
let atlas_frame = atlas.texture_viewport(atlas_index);
|
||||
|
||||
|
@ -166,21 +181,19 @@ impl ShadowMapsPass {
|
|||
atlas_frame,
|
||||
};
|
||||
|
||||
let uniform_index = self.light_uniforms_index;
|
||||
/* let uniform_index = self.light_uniforms_index;
|
||||
self.light_uniforms_index += 1;
|
||||
|
||||
//self.light_uniforms_buffer
|
||||
let offset = uniform_index_offset(&device.limits(), uniform_index);
|
||||
queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform));
|
||||
queue.write_buffer(&self.light_uniforms_buffer, offset as u64, bytemuck::bytes_of(&uniform)); */
|
||||
let uniform_index = self.light_uniforms_buffer.insert(queue, &uniform);
|
||||
|
||||
let v = LightDepthMap {
|
||||
atlas_index,
|
||||
uniform_index,
|
||||
};
|
||||
self.depth_maps.insert(
|
||||
entity,
|
||||
v,
|
||||
);
|
||||
self.depth_maps.insert(entity, v);
|
||||
|
||||
v
|
||||
}
|
||||
|
@ -228,7 +241,9 @@ impl Node for ShadowMapsPass {
|
|||
node.add_buffer_slot(
|
||||
ShadowMapsPassSlots::ShadowLightUniformsBuffer,
|
||||
SlotAttribute::Output,
|
||||
Some(SlotValue::Buffer(self.light_uniforms_buffer.clone())),
|
||||
Some(SlotValue::Buffer(
|
||||
self.light_uniforms_buffer.buffer().clone(),
|
||||
)),
|
||||
);
|
||||
|
||||
node.add_buffer_slot(
|
||||
|
@ -257,9 +272,9 @@ impl Node for ShadowMapsPass {
|
|||
|
||||
for (entity, pos, _) in world.view_iter::<(Entities, &Transform, Has<DirectionalLight>)>() {
|
||||
if !self.depth_maps.contains_key(&entity) {
|
||||
|
||||
// TODO: dont pack the textures as they're added
|
||||
let atlas_index = self.create_depth_map(graph.device(), &context.queue, entity, *pos);
|
||||
let atlas_index =
|
||||
self.create_depth_map(&context.queue, entity, *pos);
|
||||
index_components_queue.push_back((entity, atlas_index));
|
||||
|
||||
debug!("Created depth map for {:?} light entity", entity);
|
||||
|
@ -268,10 +283,13 @@ impl Node for ShadowMapsPass {
|
|||
|
||||
// now consume from the queue adding the components to the entities
|
||||
while let Some((entity, depth)) = index_components_queue.pop_front() {
|
||||
world.insert(entity, LightShadowMapId {
|
||||
atlas_index: depth.atlas_index,
|
||||
uniform_index: depth.uniform_index,
|
||||
});
|
||||
world.insert(
|
||||
entity,
|
||||
LightShadowMapId {
|
||||
atlas_index: depth.atlas_index,
|
||||
uniform_index: depth.uniform_index,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if self.pipeline.is_none() {
|
||||
|
@ -358,11 +376,26 @@ impl Node for ShadowMapsPass {
|
|||
});
|
||||
pass.set_pipeline(&pipeline);
|
||||
let viewport = atlas.texture_viewport(dir_depth_map.atlas_index);
|
||||
debug!("Rendering shadow map to viewport: {viewport:?}, uniform index: {}", dir_depth_map.uniform_index);
|
||||
debug!(
|
||||
"Rendering shadow map to viewport: {viewport:?}, uniform index: {}",
|
||||
dir_depth_map.uniform_index
|
||||
);
|
||||
// only render to the light's map in the atlas
|
||||
pass.set_viewport(viewport.offset.x as _, viewport.offset.y as _, viewport.size.x as _, viewport.size.y as _, 0.0, 1.0);
|
||||
pass.set_viewport(
|
||||
viewport.offset.x as _,
|
||||
viewport.offset.y as _,
|
||||
viewport.size.x as _,
|
||||
viewport.size.y as _,
|
||||
0.0,
|
||||
1.0,
|
||||
);
|
||||
// only clear the light map in the atlas
|
||||
pass.set_scissor_rect(viewport.offset.x, viewport.offset.y, viewport.size.x, viewport.size.y);
|
||||
pass.set_scissor_rect(
|
||||
viewport.offset.x,
|
||||
viewport.offset.y,
|
||||
viewport.size.x,
|
||||
viewport.size.y,
|
||||
);
|
||||
|
||||
for job in render_meshes.iter() {
|
||||
// get the mesh (containing vertices) and the buffers from storage
|
||||
|
@ -373,8 +406,9 @@ impl Node for ShadowMapsPass {
|
|||
}
|
||||
let buffers = buffers.unwrap();
|
||||
|
||||
let uniform_index = uniform_index_offset(&context.device.limits(), dir_depth_map.uniform_index);
|
||||
//debug!("Uniform offset: {uniform_index}");
|
||||
let uniform_index =
|
||||
self.light_uniforms_buffer
|
||||
.offset_of(dir_depth_map.uniform_index) as u32;
|
||||
pass.set_bind_group(0, &self.uniforms_bg, &[uniform_index]);
|
||||
|
||||
// Get the bindgroup for job's transform and bind to it using an offset.
|
||||
|
@ -447,7 +481,7 @@ impl LightShadowMapAtlas {
|
|||
}
|
||||
}
|
||||
|
||||
fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 {
|
||||
/* fn uniform_index_offset(limits: &wgpu::Limits, uniform_idx: u64) -> u32 {
|
||||
let t = uniform_idx as u32 % (limits.max_storage_buffer_binding_size / mem::size_of::<LightShadowUniform>() as u32);
|
||||
t * limits.min_uniform_buffer_offset_alignment
|
||||
}
|
||||
} */
|
||||
|
|
|
@ -18,3 +18,6 @@ pub mod graph;
|
|||
|
||||
mod texture_atlas;
|
||||
pub use texture_atlas::*;
|
||||
|
||||
mod slot_buffer;
|
||||
pub use slot_buffer::*;
|
|
@ -0,0 +1,150 @@
|
|||
use std::{collections::VecDeque, marker::PhantomData, mem, num::NonZeroU64, sync::Arc};
|
||||
|
||||
/// A buffer on the GPU that has persistent indices.
|
||||
///
|
||||
/// `GpuSlotBuffer` allocates a buffer on the GPU and keeps stable indices of elements and
|
||||
/// reuses ones that were removed. It supports aligned buffers with [`GpuSlotBuffer::new_aligned`],
|
||||
/// as well as unaligned buffers with [`GpuSlotBuffer::new`].
|
||||
pub struct GpuSlotBuffer<T: bytemuck::Pod + bytemuck::Zeroable> {
|
||||
/// The amount of elements that can fit in the buffer.
|
||||
capacity: u64,
|
||||
/// The ending point of the buffer elements.
|
||||
len: u64,
|
||||
/// The list of dead and reusable indices in the buffer.
|
||||
dead_indices: VecDeque<u64>,
|
||||
/// The optional alignment of elements in the buffer.
|
||||
alignment: Option<u64>,
|
||||
/// The actual gpu buffer
|
||||
buffer: Arc<wgpu::Buffer>,
|
||||
_marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: bytemuck::Pod + bytemuck::Zeroable> GpuSlotBuffer<T> {
|
||||
/// Create a new GpuSlotBuffer with unaligned elements.
|
||||
///
|
||||
/// See [`GpuSlotBuffer::new_aligned`].
|
||||
pub fn new(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64) -> Self {
|
||||
Self::new_impl(device, label, usage, capacity, None)
|
||||
}
|
||||
|
||||
/// Create a new buffer with **aligned** elements.
|
||||
///
|
||||
/// See [`GpuSlotBuffer::new`].
|
||||
pub fn new_aligned(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: u64) -> Self {
|
||||
Self::new_impl(device, label, usage, capacity, Some(alignment))
|
||||
}
|
||||
|
||||
fn new_impl(device: &wgpu::Device, label: Option<&str>, usage: wgpu::BufferUsages, capacity: u64, alignment: Option<u64>) -> Self {
|
||||
let buffer = Arc::new(device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label,
|
||||
size: capacity * mem::size_of::<T>() as u64,
|
||||
usage,
|
||||
mapped_at_creation: false,
|
||||
}));
|
||||
|
||||
Self {
|
||||
capacity,
|
||||
len: 0,
|
||||
dead_indices: VecDeque::default(),
|
||||
buffer,
|
||||
alignment,
|
||||
_marker: PhantomData
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates the byte offset in the buffer of the element at `i`.
|
||||
pub fn offset_of(&self, i: u64) -> u64 {
|
||||
let offset = i * mem::size_of::<T>() as u64;
|
||||
|
||||
if let Some(align) = self.alignment {
|
||||
round_mult::up(offset, NonZeroU64::new(align).unwrap()).unwrap()
|
||||
} else {
|
||||
offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Set an element at `i` in the buffer to `val`.
|
||||
pub fn set_at(&self, queue: &wgpu::Queue, i: u64, val: &T) {
|
||||
let offset = self.offset_of(i);
|
||||
queue.write_buffer(&self.buffer, offset, bytemuck::bytes_of(val));
|
||||
}
|
||||
|
||||
/// Attempt to insert an element to the GPU buffer, returning the index it was inserted at.
|
||||
///
|
||||
/// Returns `None` when the buffer has no space to fit the element.
|
||||
pub fn try_insert(&mut self, queue: &wgpu::Queue, val: &T) -> Option<u64> {
|
||||
// reuse a dead index or get the next one
|
||||
let i = match self.dead_indices.pop_front() {
|
||||
Some(i) => i,
|
||||
None => {
|
||||
if self.len == self.capacity {
|
||||
return None;
|
||||
}
|
||||
|
||||
let i = self.len;
|
||||
self.len += 1;
|
||||
i
|
||||
}
|
||||
};
|
||||
|
||||
self.set_at(queue, i, val);
|
||||
|
||||
Some(i)
|
||||
}
|
||||
|
||||
/// Insert an element to the GPU buffer, returning the index it was inserted at.
|
||||
///
|
||||
/// The index is not guaranteed to be the end of the buffer since this structure reuses
|
||||
/// indices after they're removed.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the buffer does not have space to fit `val`, see [`GpuSlotBuffer::try_insert`].
|
||||
pub fn insert(&mut self, queue: &wgpu::Queue, val: &T) -> u64 {
|
||||
self.try_insert(queue, val)
|
||||
.expect("GPU slot buffer ran out of slots to push elements into")
|
||||
}
|
||||
|
||||
/// Remove the element at `i`, clearing the elements slot in the buffer.
|
||||
///
|
||||
/// If you do not care that the slot in the buffer is emptied, use
|
||||
/// [`GpuSlotBuffer::remove_quick`].
|
||||
pub fn remove(&mut self, queue: &wgpu::Queue, i: u64) {
|
||||
let mut zeros = Vec::new();
|
||||
zeros.resize(mem::size_of::<T>(), 0);
|
||||
|
||||
let offset = self.offset_of(i);
|
||||
queue.write_buffer(&self.buffer, offset, bytemuck::cast_slice(zeros.as_slice()));
|
||||
self.dead_indices.push_back(i);
|
||||
}
|
||||
|
||||
/// Remove the element at `i` without clearing its space in the buffer.
|
||||
///
|
||||
/// If you want to ensure that the slot in the buffer is emptied, use
|
||||
/// [`GpuSlotBuffer::remove`].
|
||||
pub fn remove_quick(&mut self, i: u64) {
|
||||
self.dead_indices.push_back(i);
|
||||
}
|
||||
|
||||
/// Returns the backing [`wgpu::Buffer`].
|
||||
pub fn buffer(&self) -> &Arc<wgpu::Buffer> {
|
||||
&self.buffer
|
||||
}
|
||||
|
||||
/// Return the length of the buffer.
|
||||
///
|
||||
/// This value may not reflect the amount of elements that are actually alive in the buffer if
|
||||
/// elements were removed and not re-added.
|
||||
pub fn len(&self) -> u64 {
|
||||
self.len
|
||||
}
|
||||
|
||||
/// Return the amount of inuse indices in the buffer.
|
||||
pub fn inuse_len(&self) -> u64 {
|
||||
self.len - self.dead_indices.len() as u64
|
||||
}
|
||||
|
||||
/// Returns the amount of elements the buffer can fit.
|
||||
pub fn capacity(&self) -> u64 {
|
||||
self.capacity
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue