Improve Performance in Scenes With Many Lights #14

Merged
SeanOMik merged 8 commits from bugfix/many-lights-poor-performance into main 2024-04-24 23:55:16 +00:00
9 changed files with 410 additions and 133 deletions
Showing only changes of commit 8eac563229 - Show all commits

7
Cargo.lock generated
View File

@ -1849,6 +1849,7 @@ dependencies = [
"tracing-log 0.1.4", "tracing-log 0.1.4",
"tracing-subscriber", "tracing-subscriber",
"tracing-tracy", "tracing-tracy",
"unique",
"uuid", "uuid",
"wgpu", "wgpu",
"winit", "winit",
@ -3565,6 +3566,12 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]]
name = "unique"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d360722e1f3884f5b14d332185f02ff111f771f0c76a313268fe6af1409aba96"
[[package]] [[package]]
name = "url" name = "url"
version = "2.5.0" version = "2.5.0"

View File

@ -15,5 +15,8 @@ fps_counter = "3.0.0"
linker = "/usr/bin/clang" linker = "/usr/bin/clang"
rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"] rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]
[profile.dev]
opt-level = 1
[profile.release] [profile.release]
debug = true debug = true

View File

@ -1,6 +1,6 @@
use std::{collections::{HashMap, VecDeque, HashSet}, ptr::NonNull}; use std::{collections::{HashMap, VecDeque, HashSet}, ptr::NonNull};
use tracing::{debug_span, info_span}; use tracing::{debug_span, info_span, instrument};
use super::System; use super::System;
@ -60,6 +60,7 @@ impl GraphExecutor {
} }
/// Executes the systems in the graph /// Executes the systems in the graph
#[instrument(skip(self, world_ptr, stop_on_error))]
pub fn execute(&mut self, mut world_ptr: NonNull<World>, stop_on_error: bool) pub fn execute(&mut self, mut world_ptr: NonNull<World>, stop_on_error: bool)
-> Result<Vec<GraphExecutorError>, GraphExecutorError> { -> Result<Vec<GraphExecutorError>, GraphExecutorError> {
let mut stack = VecDeque::new(); let mut stack = VecDeque::new();
@ -71,13 +72,11 @@ impl GraphExecutor {
let mut possible_errors = Vec::new(); let mut possible_errors = Vec::new();
let sys_span = info_span!("graph_exec", system=tracing::field::Empty);
while let Some(node) = stack.pop_front() { while let Some(node) = stack.pop_front() {
let system = self.systems.get_mut(node.as_str()).unwrap(); let system = self.systems.get_mut(node.as_str()).unwrap();
sys_span.record("system", system.name.clone()); let span = info_span!("graph_exec", system=system.name.clone());
let _e = sys_span.enter(); let _e = span.enter();
if let Err(e) = system.system.execute(world_ptr) if let Err(e) = system.system.execute(world_ptr)
.map_err(|e| GraphExecutorError::SystemError(node, e)) { .map_err(|e| GraphExecutorError::SystemError(node, e)) {

View File

@ -21,7 +21,7 @@ tracing-tracy = { version = "0.11.0", optional = true }
async-std = { version = "1.12.0", features = [ "unstable", "attributes" ] } async-std = { version = "1.12.0", features = [ "unstable", "attributes" ] }
cfg-if = "1" cfg-if = "1"
bytemuck = { version = "1.12", features = [ "derive" ] } bytemuck = { version = "1.12", features = [ "derive", "min_const_generics" ] }
image = { version = "0.24", default-features = false, features = ["png", "jpeg"] } image = { version = "0.24", default-features = false, features = ["png", "jpeg"] }
anyhow = "1.0" anyhow = "1.0"
instant = "0.1" instant = "0.1"
@ -33,6 +33,7 @@ quote = "1.0.29"
uuid = { version = "1.5.0", features = ["v4", "fast-rng"] } uuid = { version = "1.5.0", features = ["v4", "fast-rng"] }
itertools = "0.11.0" itertools = "0.11.0"
thiserror = "1.0.56" thiserror = "1.0.56"
unique = "0.9.1"
[features] [features]
tracy = ["dep:tracing-tracy"] tracy = ["dep:tracing-tracy"]

View File

@ -0,0 +1,292 @@
use std::{alloc::Layout, cmp, marker::PhantomData, mem};
use std::{alloc, ptr};
use unique::Unique;
/// A [`Vec`] with its elements aligned to a runtime alignment value.
pub struct AVec<T> {
buf: Unique<u8>,
cap: usize,
len: usize,
align: usize,
_marker: PhantomData<T>,
}
impl<T> AVec<T> {
// Tiny Vecs are dumb. Skip to:
// - 8 if the element size is 1, because any heap allocators are likely
// to round up a request of less than 8 bytes to at least 8 bytes.
// - 4 if elements are moderate-sized (<= 1 KiB).
// - 1 otherwise, to avoid wasting too much space for very short Vecs.
//
// Taken from Rust's standard library RawVec
pub(crate) const MIN_NON_ZERO_CAP: usize = if mem::size_of::<T>() == 1 {
8
} else if mem::size_of::<T>() <= 1024 {
4
} else {
1
};
#[inline]
pub fn new(alignment: usize) -> Self {
debug_assert!(mem::size_of::<T>() > 0, "ZSTs not yet supported");
Self {
buf: Unique::dangling(),
cap: 0,
len: 0,
align: alignment,
_marker: PhantomData
}
}
/// Constructs a new, empty `AVec` with at least the specified capacity.
///
/// The aligned vector will be able to hold at least `capacity` elements without reallocating.
/// This method may allocate for more elements than `capacity`. If `capacity` is zero,
/// the vector will not allocate.
///
/// # Panics
///
/// Panics if the capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn with_capacity(alignment: usize, capacity: usize) -> Self {
let mut s = Self::new(alignment);
if capacity > 0 {
unsafe {
s.grow_amortized(0, capacity);
}
}
s
}
/// Calculates the size of the 'slot' for a single **aligned** item.
#[inline(always)]
fn slot_size(&self) -> usize {
let a = self.align - 1;
mem::align_of::<T>() + (a) & !a
}
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
unsafe fn grow_amortized(&mut self, len: usize, additional: usize) {
debug_assert!(additional > 0);
let required_cap = len.checked_add(additional)
.expect("Capacity overflow");
let cap = cmp::max(self.cap * 2, required_cap);
let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap);
let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align);
let ptr = alloc::alloc(new_layout);
self.buf = Unique::new_unchecked(ptr);
self.cap = cap;
}
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
unsafe fn grow_exact(&mut self, len: usize, additional: usize) {
debug_assert!(additional > 0);
let cap = len.checked_add(additional)
.expect("Capacity overflow");
let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align);
let ptr = alloc::alloc(new_layout);
self.buf = Unique::new_unchecked(ptr);
self.cap = cap;
}
/// Reserves capacity for at least `additional` more elements.
///
/// The collection may reserve more space to speculatively avoid frequent reallocations.
/// After calling `reserve`, capacity will be greater than or equal to
/// `self.len() + additional`. Does nothing if capacity is already sufficient.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn reserve(&mut self, additional: usize) {
debug_assert!(additional > 0);
let remaining = self.capacity().wrapping_sub(self.len);
if additional > remaining {
unsafe { self.grow_amortized(self.len, additional) };
}
}
/// Reserves capacity for `additional` more elements.
///
/// Unlike [`reserve`], this will not over-allocate to speculatively avoid frequent
/// reallocations. After calling `reserve_exact`, capacity will be equal to
/// `self.len() + additional`. Does nothing if the capacity is already sufficient.
///
/// Prefer [`reserve`] if future insertions are expected.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
let remaining = self.capacity().wrapping_sub(self.len);
if additional > remaining {
unsafe { self.grow_exact(self.len, additional) };
}
}
/// Appends an element to the back of the collection.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn push(&mut self, val: T) {
if self.len == self.cap {
self.reserve(self.slot_size());
}
unsafe {
// SAFETY: the length is ensured to be less than the capacity.
self.set_at_unchecked(self.len, val);
}
self.len += 1;
}
/// Sets an element at position `idx` within the vector to `val`.
///
/// # Unsafe
///
/// If `self.len > idx`, bytes past the length of the vector will be written to, potentially
/// also writing past the capacity of the vector.
#[inline(always)]
unsafe fn set_at_unchecked(&mut self, idx: usize, val: T) {
let ptr = self.buf
.as_ptr()
.add(idx * self.slot_size());
std::ptr::write(ptr.cast::<T>(), val);
}
/// Sets an element at position `idx` within the vector to `val`.
///
/// # Panics
///
/// Panics if `idx >= self.len`.
#[inline(always)]
pub fn set_at(&mut self, idx: usize, val: T) {
assert!(self.len > idx);
unsafe {
self.set_at_unchecked(idx, val);
}
}
/// Shortens the vector, keeping the first `len` elements and dropping the rest.
///
/// If `len` is greater or equal to the vectors current length, this has no effect.
#[inline]
pub fn truncate(&mut self, len: usize) {
if len > self.len {
return;
}
unsafe {
// drop each element past the new length
for i in len..self.len {
let ptr = self.buf.as_ptr()
.add(i * self.slot_size())
.cast::<T>();
ptr::drop_in_place(ptr);
}
}
self.len = len;
}
#[inline(always)]
pub fn as_ptr(&self) -> *const u8 {
self.buf.as_ptr()
}
#[inline(always)]
pub fn as_mut_ptr(&self) -> *mut u8 {
self.buf.as_ptr()
}
/// Returns the alignment of the elements in the vector.
#[inline(always)]
pub fn align(&self) -> usize {
self.align
}
/// Returns the length of the vector.
#[inline(always)]
pub fn len(&self) -> usize {
self.len
}
/// Returns the capacity of the vector.
///
/// The capacity is the amount of elements that the vector can store without reallocating.
#[inline(always)]
pub fn capacity(&self) -> usize {
self.cap
}
}
impl<T: Clone> AVec<T> {
/// Resized the `AVec` in-place so that `len` is equal to `new_len`.
///
/// If `new_len` is greater than `len`, the `AVec` is extended by the difference, and
/// each additional slot is filled with `value`. If `new_len` is less than `len`,
/// the `AVec` will be truncated by to be `new_len`
///
/// This method requires `T` to implement [`Clone`] in order to clone the passed value.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn resize(&mut self, new_len: usize, value: T) {
if new_len > self.len {
self.reserve(new_len - self.len);
unsafe {
let mut ptr = self.buf
.as_ptr().add(self.len * self.slot_size());
// write all elements besides the last one
for _ in 1..new_len {
std::ptr::write(ptr.cast::<T>(), value.clone());
ptr = ptr.add(self.slot_size());
self.len += 1;
}
if new_len > 0 {
// the last element can be written without cloning
std::ptr::write(ptr.cast::<T>(), value.clone());
self.len += 1;
}
self.len = new_len;
}
} else {
self.truncate(new_len);
}
}
}

View File

@ -13,3 +13,4 @@ pub mod window;
pub mod transform_buffer_storage; pub mod transform_buffer_storage;
pub mod light; pub mod light;
pub mod light_cull_compute; pub mod light_cull_compute;
pub mod avec;

View File

@ -169,7 +169,7 @@ impl BasicRenderer {
format: surface_format, format: surface_format,
width: size.width, width: size.width,
height: size.height, height: size.height,
present_mode, present_mode: wgpu::PresentMode::Immediate,
alpha_mode: surface_caps.alpha_modes[0], alpha_mode: surface_caps.alpha_modes[0],
view_formats: vec![], view_formats: vec![],
}; };
@ -448,7 +448,8 @@ impl Renderer for BasicRenderer {
alive_entities.insert(entity); alive_entities.insert(entity);
if let Some((mesh_han, mesh_epoch)) = mesh_pair { if let Some((mesh_han, mesh_epoch)) = mesh_pair {
let interop_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch); // TODO: speed up interpolating transforms
let interop_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
if let Some(mesh) = mesh_han.data_ref() { if let Some(mesh) = mesh_han.data_ref() {
// if process mesh did not just create a new mesh, and the epoch // if process mesh did not just create a new mesh, and the epoch
@ -464,8 +465,8 @@ impl Renderer for BasicRenderer {
} }
let group = TransformGroup::EntityRes(entity, mesh_han.uuid()); let group = TransformGroup::EntityRes(entity, mesh_han.uuid());
let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits, let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits,
group, || ( interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation) )); group, interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation));
let material = mesh.material.as_ref().unwrap() let material = mesh.material.as_ref().unwrap()
.data_ref().unwrap(); .data_ref().unwrap();
@ -482,7 +483,8 @@ impl Renderer for BasicRenderer {
lyra_scene::system_update_world_transforms(scene.world(), view).unwrap(); lyra_scene::system_update_world_transforms(scene.world(), view).unwrap();
} }
let interpo_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch); // TODO: speed up interpolating transforms
let interpo_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
for (mesh_han, pos) in scene.world().view_iter::<(&MeshHandle, &WorldTransform)>() { for (mesh_han, pos) in scene.world().view_iter::<(&MeshHandle, &WorldTransform)>() {
if let Some(mesh) = mesh_han.data_ref() { if let Some(mesh) = mesh_han.data_ref() {
@ -502,8 +504,8 @@ impl Renderer for BasicRenderer {
let scene_mesh_group = TransformGroup::Res(scene_han.uuid(), mesh_han.uuid()); let scene_mesh_group = TransformGroup::Res(scene_han.uuid(), mesh_han.uuid());
let group = TransformGroup::OwnedGroup(entity, scene_mesh_group.into()); let group = TransformGroup::OwnedGroup(entity, scene_mesh_group.into());
let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits, let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits,
group, || ( mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) )); group, mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) );
let material = mesh.material.as_ref().unwrap() let material = mesh.material.as_ref().unwrap()
.data_ref().unwrap(); .data_ref().unwrap();
@ -517,7 +519,7 @@ impl Renderer for BasicRenderer {
} }
// collect dead entities // collect dead entities
self.transform_buffers.tick(); self.transform_buffers.send_to_gpu(&self.queue);
// when buffer storage length does not match the amount of iterated entities, // when buffer storage length does not match the amount of iterated entities,
// remove all dead entities, and their buffers, if they weren't iterated over // remove all dead entities, and their buffers, if they weren't iterated over
@ -611,7 +613,7 @@ impl Renderer for BasicRenderer {
// Get the bindgroup for job's transform and bind to it using an offset. // Get the bindgroup for job's transform and bind to it using an offset.
let bindgroup = self.transform_buffers.bind_group(job.transform_id); let bindgroup = self.transform_buffers.bind_group(job.transform_id);
let offset = self.transform_buffers.buffer_offset(job.transform_id); let offset = self.transform_buffers.buffer_offset(job.transform_id);
render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]); render_pass.set_bind_group(1, bindgroup, &[ offset, ]);
render_pass.set_bind_group(2, &self.camera_buffer.bindgroup(), &[]); render_pass.set_bind_group(2, &self.camera_buffer.bindgroup(), &[]);
render_pass.set_bind_group(3, &self.light_buffers.bind_group_pair.bindgroup, &[]); render_pass.set_bind_group(3, &self.light_buffers.bind_group_pair.bindgroup, &[]);

View File

@ -21,6 +21,11 @@ struct VertexOutput {
@location(2) world_normal: vec3<f32>, @location(2) world_normal: vec3<f32>,
} }
struct TransformData {
transform: mat4x4<f32>,
normal_matrix: mat4x4<f32>,
}
struct CameraUniform { struct CameraUniform {
view: mat4x4<f32>, view: mat4x4<f32>,
inverse_projection: mat4x4<f32>, inverse_projection: mat4x4<f32>,
@ -51,9 +56,7 @@ struct Lights {
}; };
@group(1) @binding(0) @group(1) @binding(0)
var<uniform> u_model_transform: mat4x4<f32>; var<uniform> u_model_transform_data: TransformData;
@group(1) @binding(1)
var<uniform> u_model_normal_matrix: mat4x4<f32>;
@group(2) @binding(0) @group(2) @binding(0)
var<uniform> u_camera: CameraUniform; var<uniform> u_camera: CameraUniform;
@ -68,13 +71,14 @@ fn vs_main(
var out: VertexOutput; var out: VertexOutput;
out.tex_coords = model.tex_coords; out.tex_coords = model.tex_coords;
out.clip_position = u_camera.view_projection * u_model_transform * vec4<f32>(model.position, 1.0); out.clip_position = u_camera.view_projection * u_model_transform_data.transform * vec4<f32>(model.position, 1.0);
// the normal mat is actually only a mat3x3, but there's a bug in wgpu: https://github.com/gfx-rs/wgpu-rs/issues/36 // the normal mat is actually only a mat3x3, but there's a bug in wgpu: https://github.com/gfx-rs/wgpu-rs/issues/36
let normal_mat = mat3x3(u_model_normal_matrix[0].xyz, u_model_normal_matrix[1].xyz, u_model_normal_matrix[2].xyz); let normal_mat4 = u_model_transform_data.normal_matrix;
let normal_mat = mat3x3(normal_mat4[0].xyz, normal_mat4[1].xyz, normal_mat4[2].xyz);
out.world_normal = normalize(normal_mat * model.normal, ); out.world_normal = normalize(normal_mat * model.normal, );
var world_position: vec4<f32> = u_model_transform * vec4<f32>(model.position, 1.0); var world_position: vec4<f32> = u_model_transform_data.transform * vec4<f32>(model.position, 1.0);
out.world_position = world_position.xyz; out.world_position = world_position.xyz;
return out; return out;

View File

@ -7,6 +7,8 @@ use wgpu::Limits;
use std::mem; use std::mem;
use crate::render::avec::AVec;
/// A group id created from a [`TransformGroup`]. /// A group id created from a [`TransformGroup`].
/// ///
/// This is mainly created so that [`TransformGroup::OwnedGroup`] can use another group inside of it. /// This is mainly created so that [`TransformGroup::OwnedGroup`] can use another group inside of it.
@ -67,8 +69,10 @@ pub struct TransformIndex {
struct BufferEntry { struct BufferEntry {
pub len: usize, pub len: usize,
pub bindgroup: wgpu::BindGroup, pub bindgroup: wgpu::BindGroup,
pub transform_buffer: wgpu::Buffer, pub buffer: wgpu::Buffer,
pub normal_buffer: wgpu::Buffer, transforms: AVec<TransformNormalMatPair>,
//pub normal_buffer: wgpu::Buffer,
} }
/// A HashMap that caches values for reuse. /// A HashMap that caches values for reuse.
@ -159,10 +163,12 @@ impl<K: Hash + Eq + PartialEq + Clone, V: Clone, S: BuildHasher> CachedValMap<K,
/// update, and retrieve the transforms. /// update, and retrieve the transforms.
pub struct TransformBuffers { pub struct TransformBuffers {
pub bindgroup_layout: wgpu::BindGroupLayout, pub bindgroup_layout: wgpu::BindGroupLayout,
groups: CachedValMap<TransformGroupId, TransformIndex>, //groups: CachedValMap<TransformGroupId, TransformIndex>,
//groups: SlotMap<TransformGroupId, TransformIndex>,
entries: Vec<BufferEntry>, entries: Vec<BufferEntry>,
limits: wgpu::Limits, limits: wgpu::Limits,
max_transform_count: usize, max_transform_count: usize,
next_index: usize,
} }
impl TransformBuffers { impl TransformBuffers {
@ -181,26 +187,16 @@ impl TransformBuffers {
}, },
count: None, count: None,
}, },
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::VERTEX,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: true,
min_binding_size: None,
},
count: None,
}
], ],
label: Some("transform_bind_group_layout"), label: Some("transform_bind_group_layout"),
}); });
let mut s = Self { let mut s = Self {
bindgroup_layout, bindgroup_layout,
groups: Default::default(),
entries: Default::default(), entries: Default::default(),
max_transform_count: (limits.max_uniform_buffer_binding_size / 2) as usize / (mem::size_of::<glam::Mat4>()), max_transform_count: (limits.max_uniform_buffer_binding_size) as usize / (limits.min_uniform_buffer_offset_alignment as usize), //(mem::size_of::<glam::Mat4>()),
limits, limits,
next_index: 0,
}; };
// create the first uniform buffer // create the first uniform buffer
@ -209,73 +205,59 @@ impl TransformBuffers {
s s
} }
/// Update an existing transform in the buffers. /// Write the transform buffers to the gpu.
/// ///
/// # Panics /// This uses [`wgpu::Queue::write_buffer`], so the write is not immediately submitted,
/// Panics if the `entity_group` is not already inside of the buffers. /// and instead enqueued internally to happen at the start of the next submit() call.
#[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))] pub fn send_to_gpu(&mut self, queue: &wgpu::Queue) {
pub fn update_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex { self.next_index = 0;
let index = *self.groups.get(entity_group.into())
.expect("Use 'push_transform' for new entities");
let entry = self.entries.get_mut(index.entry_index).unwrap();
let normal_matrix = glam::Mat4::from_mat3(normal_matrix); for entry in &mut self.entries {
entry.len = 0;
// write the transform and normal to the end of the transform let p = entry.transforms.as_ptr();
let offset = Self::get_buffer_offset(limits, index) as _; let bytes = unsafe { std::slice::from_raw_parts(p as *const u8, entry.transforms.len() * entry.transforms.align()) };
queue.write_buffer(&entry.transform_buffer, offset, bytemuck::bytes_of(&transform));
queue.write_buffer(&entry.normal_buffer, offset, bytemuck::bytes_of(&normal_matrix));
index queue.write_buffer(&entry.buffer, 0, bytes);
} }
/// Push a new transform into the buffers.
#[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))]
pub fn push_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex {
self.groups.insert(entity_group.into(), || {
// this closure is only called when there are no values that can be reused,
// so we get a brand new index at the end of the last entry in the chain.
let last = self.entries.last_mut().unwrap();
// ensure the gpu buffer is not overflown
debug_assert!(last.len < self.max_transform_count,
"Transform buffer is filled and 'next_indices' was not incremented! \
Was a new buffer created?");
let tidx = last.len;
last.len += 1;
TransformIndex {
entry_index: self.entries.len() - 1,
transform_index: tidx
}
});
self.update_transform(queue, limits, entity_group, transform, normal_matrix)
}
/// Collect the dead transforms and prepare self to check next time.
pub fn tick(&mut self) {
self.groups.update();
}
/// Returns a boolean indicating if the buffer contains this group.
pub fn contains(&self, group: TransformGroup) -> bool {
self.groups.contains(group.into())
} }
/// Update an existing transform group or if its not existing yet, pushes it to the buffer. /// Update an existing transform group or if its not existing yet, pushes it to the buffer.
/// ///
/// Returns: the index that the transform is at in the buffers. /// Returns: the index that the transform is at in the buffers.
#[instrument(skip(self, queue, limits, group, transform_fn))] #[instrument(skip(self, device, queue, limits, group, transform, normal_matrix))]
pub fn update_or_push<F>(&mut self, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform_fn: F) -> TransformIndex #[inline(always)]
where F: Fn() -> (glam::Mat4, glam::Mat3) pub fn update_or_push(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex
{ {
let (transform, normal_matrix) = transform_fn(); // maybe will be used at some point again
if self.contains(group) { let _ = (queue, limits, group);
self.update_transform(queue, limits, group, transform, normal_matrix)
} else { let normal_matrix = glam::Mat4::from_mat3(normal_matrix);
self.push_transform(queue, limits, group, transform, normal_matrix)
let index = self.next_index;
self.next_index += 1;
// the index of the entry to put the transform into
let entry_index = index / self.max_transform_count;
// the index of the transform in the buffer
let transform_index = index % self.max_transform_count;
if entry_index >= self.entries.len() {
self.expand_buffers(device);
}
let entry = self.entries.get_mut(entry_index).unwrap();
// write the transform and normal to the end of the transform
entry.transforms.set_at(transform_index, TransformNormalMatPair {
transform,
normal_mat: normal_matrix,
});
entry.len += 1;
TransformIndex {
entry_index: 0,
transform_index: index,
} }
} }
@ -297,21 +279,9 @@ impl TransformBuffers {
} }
); );
let normal_mat_buffer = device.create_buffer( let tran_stride = mem::size_of::<TransformNormalMatPair>();
&wgpu::BufferDescriptor {
label: Some(&format!("B_NormalMatrix_{}", self.entries.len())),
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
size: max_buffer_sizes,
mapped_at_creation: false,
}
);
let tran_stride = mem::size_of::<glam::Mat4>(); let bindgroup = device.create_bind_group(&wgpu::BindGroupDescriptor {
// although a normal matrix only needs to be a mat3, there's a weird issue with
// misalignment from wgpu or spirv-cross: https://github.com/gfx-rs/wgpu-rs/issues/36
let norm_stride = mem::size_of::<glam::Mat4>();
let transform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &self.bindgroup_layout, layout: &self.bindgroup_layout,
entries: &[ entries: &[
wgpu::BindGroupEntry { wgpu::BindGroupEntry {
@ -324,42 +294,34 @@ impl TransformBuffers {
} }
) )
}, },
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Buffer(
wgpu::BufferBinding {
buffer: &normal_mat_buffer,
offset: 0,
size: Some(NonZeroU64::new(norm_stride as u64).unwrap())
}
)
}
], ],
label: Some("BG_Transforms"), label: Some("BG_Transforms"),
}); });
let mut transforms = AVec::new(limits.min_uniform_buffer_offset_alignment as _);
transforms.resize(self.max_transform_count, TransformNormalMatPair {
transform: glam::Mat4::IDENTITY,
normal_mat: glam::Mat4::IDENTITY,
});
let entry = BufferEntry { let entry = BufferEntry {
bindgroup: transform_bind_group, bindgroup,
transform_buffer, buffer: transform_buffer,
normal_buffer: normal_mat_buffer,
len: 0, len: 0,
transforms,
}; };
self.entries.push(entry); self.entries.push(entry);
} }
/// Returns the bind group for the transform index. /// Returns the bind group for the transform index.
#[inline(always)]
pub fn bind_group(&self, transform_id: TransformIndex) -> &wgpu::BindGroup { pub fn bind_group(&self, transform_id: TransformIndex) -> &wgpu::BindGroup {
let entry = self.entries.get(transform_id.entry_index).unwrap(); let entry_index = transform_id.transform_index / self.max_transform_count;
let entry = self.entries.get(entry_index).unwrap();
&entry.bindgroup &entry.bindgroup
} }
/// Get the buffer offset for a transform using wgpu limits.
///
/// If its possible to borrow immutably, use [`TransformBuffers::buffer_offset`].
fn get_buffer_offset(limits: &wgpu::Limits, transform_index: TransformIndex) -> u32 {
transform_index.transform_index as u32 * limits.min_uniform_buffer_offset_alignment as u32
}
/// Returns the offset of the transform inside the bind group buffer. /// Returns the offset of the transform inside the bind group buffer.
/// ///
/// ```nobuild /// ```nobuild
@ -367,15 +329,21 @@ impl TransformBuffers {
/// let offset = transform_buffers.buffer_offset(job.transform_id); /// let offset = transform_buffers.buffer_offset(job.transform_id);
/// render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]); /// render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]);
/// ``` /// ```
#[inline(always)]
pub fn buffer_offset(&self, transform_index: TransformIndex) -> u32 { pub fn buffer_offset(&self, transform_index: TransformIndex) -> u32 {
Self::get_buffer_offset(&self.limits, transform_index) //Self::get_buffer_offset(&self.limits, transform_index)
let transform_index = transform_index.transform_index % self.max_transform_count;
let t = transform_index as u32 * self.limits.min_uniform_buffer_offset_alignment as u32;
//debug!("offset: {t}");
t
} }
/// Returns a boolean indicating if the buffers need to be expanded /// Returns a boolean indicating if the buffers need to be expanded
pub fn needs_expand(&self) -> bool { pub fn needs_expand(&self) -> bool {
self.entries.last() false
/* self.entries.last()
.map(|entry| entry.len >= self.max_transform_count) .map(|entry| entry.len >= self.max_transform_count)
.unwrap_or(false) .unwrap_or(false) */
} }
} }