From 8eac563229d9b43373df17e36686286eb1c2ee9b Mon Sep 17 00:00:00 2001 From: SeanOMik Date: Sun, 21 Apr 2024 00:54:45 -0400 Subject: [PATCH] render: significantly improve performance of TransformBuffers Before the changes, a release build of 'many-lights' was running at about 130fps, now its 430fps --- Cargo.lock | 7 + examples/many-lights/Cargo.toml | 3 + lyra-ecs/src/system/graph.rs | 9 +- lyra-game/Cargo.toml | 5 +- lyra-game/src/render/avec.rs | 292 ++++++++++++++++++ lyra-game/src/render/mod.rs | 3 +- lyra-game/src/render/renderer.rs | 20 +- lyra-game/src/render/shaders/base.wgsl | 16 +- .../src/render/transform_buffer_storage.rs | 188 +++++------ 9 files changed, 410 insertions(+), 133 deletions(-) create mode 100644 lyra-game/src/render/avec.rs diff --git a/Cargo.lock b/Cargo.lock index e5d9d61..6df55fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1849,6 +1849,7 @@ dependencies = [ "tracing-log 0.1.4", "tracing-subscriber", "tracing-tracy", + "unique", "uuid", "wgpu", "winit", @@ -3565,6 +3566,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +[[package]] +name = "unique" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d360722e1f3884f5b14d332185f02ff111f771f0c76a313268fe6af1409aba96" + [[package]] name = "url" version = "2.5.0" diff --git a/examples/many-lights/Cargo.toml b/examples/many-lights/Cargo.toml index e8e5536..f552f46 100644 --- a/examples/many-lights/Cargo.toml +++ b/examples/many-lights/Cargo.toml @@ -15,5 +15,8 @@ fps_counter = "3.0.0" linker = "/usr/bin/clang" rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"] +[profile.dev] +opt-level = 1 + [profile.release] debug = true \ No newline at end of file diff --git a/lyra-ecs/src/system/graph.rs b/lyra-ecs/src/system/graph.rs index 5282532..b497cad 100644 --- a/lyra-ecs/src/system/graph.rs +++ b/lyra-ecs/src/system/graph.rs @@ -1,6 +1,6 @@ use std::{collections::{HashMap, VecDeque, HashSet}, ptr::NonNull}; -use tracing::{debug_span, info_span}; +use tracing::{debug_span, info_span, instrument}; use super::System; @@ -60,6 +60,7 @@ impl GraphExecutor { } /// Executes the systems in the graph + #[instrument(skip(self, world_ptr, stop_on_error))] pub fn execute(&mut self, mut world_ptr: NonNull, stop_on_error: bool) -> Result, GraphExecutorError> { let mut stack = VecDeque::new(); @@ -71,13 +72,11 @@ impl GraphExecutor { let mut possible_errors = Vec::new(); - let sys_span = info_span!("graph_exec", system=tracing::field::Empty); - while let Some(node) = stack.pop_front() { let system = self.systems.get_mut(node.as_str()).unwrap(); - sys_span.record("system", system.name.clone()); - let _e = sys_span.enter(); + let span = info_span!("graph_exec", system=system.name.clone()); + let _e = span.enter(); if let Err(e) = system.system.execute(world_ptr) .map_err(|e| GraphExecutorError::SystemError(node, e)) { diff --git a/lyra-game/Cargo.toml b/lyra-game/Cargo.toml index ca45d9b..d4e6009 100644 --- a/lyra-game/Cargo.toml +++ b/lyra-game/Cargo.toml @@ -21,7 +21,7 @@ tracing-tracy = { version = "0.11.0", optional = true } async-std = { version = "1.12.0", features = [ "unstable", "attributes" ] } cfg-if = "1" -bytemuck = { version = "1.12", features = [ "derive" ] } +bytemuck = { version = "1.12", features = [ "derive", "min_const_generics" ] } image = { version = "0.24", default-features = false, features = ["png", "jpeg"] } anyhow = "1.0" instant = "0.1" @@ -33,6 +33,7 @@ quote = "1.0.29" uuid = { version = "1.5.0", features = ["v4", "fast-rng"] } itertools = "0.11.0" thiserror = "1.0.56" +unique = "0.9.1" [features] -tracy = ["dep:tracing-tracy"] \ No newline at end of file +tracy = ["dep:tracing-tracy"] diff --git a/lyra-game/src/render/avec.rs b/lyra-game/src/render/avec.rs new file mode 100644 index 0000000..7bf1cff --- /dev/null +++ b/lyra-game/src/render/avec.rs @@ -0,0 +1,292 @@ +use std::{alloc::Layout, cmp, marker::PhantomData, mem}; + +use std::{alloc, ptr}; +use unique::Unique; + +/// A [`Vec`] with its elements aligned to a runtime alignment value. +pub struct AVec { + buf: Unique, + cap: usize, + len: usize, + align: usize, + _marker: PhantomData, +} + +impl AVec { + // Tiny Vecs are dumb. Skip to: + // - 8 if the element size is 1, because any heap allocators are likely + // to round up a request of less than 8 bytes to at least 8 bytes. + // - 4 if elements are moderate-sized (<= 1 KiB). + // - 1 otherwise, to avoid wasting too much space for very short Vecs. + // + // Taken from Rust's standard library RawVec + pub(crate) const MIN_NON_ZERO_CAP: usize = if mem::size_of::() == 1 { + 8 + } else if mem::size_of::() <= 1024 { + 4 + } else { + 1 + }; + + #[inline] + pub fn new(alignment: usize) -> Self { + debug_assert!(mem::size_of::() > 0, "ZSTs not yet supported"); + + Self { + buf: Unique::dangling(), + cap: 0, + len: 0, + align: alignment, + _marker: PhantomData + } + } + + /// Constructs a new, empty `AVec` with at least the specified capacity. + /// + /// The aligned vector will be able to hold at least `capacity` elements without reallocating. + /// This method may allocate for more elements than `capacity`. If `capacity` is zero, + /// the vector will not allocate. + /// + /// # Panics + /// + /// Panics if the capacity exceeds `usize::MAX` bytes. + #[inline] + pub fn with_capacity(alignment: usize, capacity: usize) -> Self { + let mut s = Self::new(alignment); + + if capacity > 0 { + unsafe { + s.grow_amortized(0, capacity); + } + } + + s + } + + /// Calculates the size of the 'slot' for a single **aligned** item. + #[inline(always)] + fn slot_size(&self) -> usize { + let a = self.align - 1; + mem::align_of::() + (a) & !a + } + + /// # Panics + /// + /// Panics if the new capacity exceeds `usize::MAX` bytes. + #[inline] + unsafe fn grow_amortized(&mut self, len: usize, additional: usize) { + debug_assert!(additional > 0); + + let required_cap = len.checked_add(additional) + .expect("Capacity overflow"); + + let cap = cmp::max(self.cap * 2, required_cap); + let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap); + + let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align); + + let ptr = alloc::alloc(new_layout); + self.buf = Unique::new_unchecked(ptr); + self.cap = cap; + } + + /// # Panics + /// + /// Panics if the new capacity exceeds `usize::MAX` bytes. + #[inline] + unsafe fn grow_exact(&mut self, len: usize, additional: usize) { + debug_assert!(additional > 0); + + let cap = len.checked_add(additional) + .expect("Capacity overflow"); + + let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align); + + let ptr = alloc::alloc(new_layout); + self.buf = Unique::new_unchecked(ptr); + self.cap = cap; + } + + /// Reserves capacity for at least `additional` more elements. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. + /// After calling `reserve`, capacity will be greater than or equal to + /// `self.len() + additional`. Does nothing if capacity is already sufficient. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `usize::MAX` bytes. + #[inline] + pub fn reserve(&mut self, additional: usize) { + debug_assert!(additional > 0); + + let remaining = self.capacity().wrapping_sub(self.len); + + if additional > remaining { + unsafe { self.grow_amortized(self.len, additional) }; + } + } + + /// Reserves capacity for `additional` more elements. + /// + /// Unlike [`reserve`], this will not over-allocate to speculatively avoid frequent + /// reallocations. After calling `reserve_exact`, capacity will be equal to + /// `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Prefer [`reserve`] if future insertions are expected. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `usize::MAX` bytes. + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + let remaining = self.capacity().wrapping_sub(self.len); + + if additional > remaining { + unsafe { self.grow_exact(self.len, additional) }; + } + } + + /// Appends an element to the back of the collection. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `usize::MAX` bytes. + #[inline] + pub fn push(&mut self, val: T) { + if self.len == self.cap { + self.reserve(self.slot_size()); + } + + unsafe { + // SAFETY: the length is ensured to be less than the capacity. + self.set_at_unchecked(self.len, val); + } + + self.len += 1; + } + + /// Sets an element at position `idx` within the vector to `val`. + /// + /// # Unsafe + /// + /// If `self.len > idx`, bytes past the length of the vector will be written to, potentially + /// also writing past the capacity of the vector. + #[inline(always)] + unsafe fn set_at_unchecked(&mut self, idx: usize, val: T) { + let ptr = self.buf + .as_ptr() + .add(idx * self.slot_size()); + + std::ptr::write(ptr.cast::(), val); + } + + /// Sets an element at position `idx` within the vector to `val`. + /// + /// # Panics + /// + /// Panics if `idx >= self.len`. + #[inline(always)] + pub fn set_at(&mut self, idx: usize, val: T) { + assert!(self.len > idx); + + unsafe { + self.set_at_unchecked(idx, val); + } + } + + /// Shortens the vector, keeping the first `len` elements and dropping the rest. + /// + /// If `len` is greater or equal to the vector’s current length, this has no effect. + #[inline] + pub fn truncate(&mut self, len: usize) { + if len > self.len { + return; + } + + unsafe { + // drop each element past the new length + for i in len..self.len { + let ptr = self.buf.as_ptr() + .add(i * self.slot_size()) + .cast::(); + + ptr::drop_in_place(ptr); + } + } + + self.len = len; + } + + #[inline(always)] + pub fn as_ptr(&self) -> *const u8 { + self.buf.as_ptr() + } + + #[inline(always)] + pub fn as_mut_ptr(&self) -> *mut u8 { + self.buf.as_ptr() + } + + /// Returns the alignment of the elements in the vector. + #[inline(always)] + pub fn align(&self) -> usize { + self.align + } + + /// Returns the length of the vector. + #[inline(always)] + pub fn len(&self) -> usize { + self.len + } + + /// Returns the capacity of the vector. + /// + /// The capacity is the amount of elements that the vector can store without reallocating. + #[inline(always)] + pub fn capacity(&self) -> usize { + self.cap + } +} + +impl AVec { + /// Resized the `AVec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `AVec` is extended by the difference, and + /// each additional slot is filled with `value`. If `new_len` is less than `len`, + /// the `AVec` will be truncated by to be `new_len` + /// + /// This method requires `T` to implement [`Clone`] in order to clone the passed value. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `usize::MAX` bytes. + #[inline] + pub fn resize(&mut self, new_len: usize, value: T) { + if new_len > self.len { + self.reserve(new_len - self.len); + + unsafe { + let mut ptr = self.buf + .as_ptr().add(self.len * self.slot_size()); + + // write all elements besides the last one + for _ in 1..new_len { + std::ptr::write(ptr.cast::(), value.clone()); + ptr = ptr.add(self.slot_size()); + self.len += 1; + } + + if new_len > 0 { + // the last element can be written without cloning + std::ptr::write(ptr.cast::(), value.clone()); + self.len += 1; + } + + self.len = new_len; + } + } else { + self.truncate(new_len); + } + } +} \ No newline at end of file diff --git a/lyra-game/src/render/mod.rs b/lyra-game/src/render/mod.rs index 5475230..d7985d5 100755 --- a/lyra-game/src/render/mod.rs +++ b/lyra-game/src/render/mod.rs @@ -12,4 +12,5 @@ pub mod camera; pub mod window; pub mod transform_buffer_storage; pub mod light; -pub mod light_cull_compute; \ No newline at end of file +pub mod light_cull_compute; +pub mod avec; \ No newline at end of file diff --git a/lyra-game/src/render/renderer.rs b/lyra-game/src/render/renderer.rs index cca268b..d4f95d8 100755 --- a/lyra-game/src/render/renderer.rs +++ b/lyra-game/src/render/renderer.rs @@ -169,7 +169,7 @@ impl BasicRenderer { format: surface_format, width: size.width, height: size.height, - present_mode, + present_mode: wgpu::PresentMode::Immediate, alpha_mode: surface_caps.alpha_modes[0], view_formats: vec![], }; @@ -448,7 +448,8 @@ impl Renderer for BasicRenderer { alive_entities.insert(entity); if let Some((mesh_han, mesh_epoch)) = mesh_pair { - let interop_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch); + // TODO: speed up interpolating transforms + let interop_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch); if let Some(mesh) = mesh_han.data_ref() { // if process mesh did not just create a new mesh, and the epoch @@ -464,8 +465,8 @@ impl Renderer for BasicRenderer { } let group = TransformGroup::EntityRes(entity, mesh_han.uuid()); - let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits, - group, || ( interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation) )); + let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits, + group, interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation)); let material = mesh.material.as_ref().unwrap() .data_ref().unwrap(); @@ -482,7 +483,8 @@ impl Renderer for BasicRenderer { lyra_scene::system_update_world_transforms(scene.world(), view).unwrap(); } - let interpo_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch); + // TODO: speed up interpolating transforms + let interpo_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch); for (mesh_han, pos) in scene.world().view_iter::<(&MeshHandle, &WorldTransform)>() { if let Some(mesh) = mesh_han.data_ref() { @@ -502,8 +504,8 @@ impl Renderer for BasicRenderer { let scene_mesh_group = TransformGroup::Res(scene_han.uuid(), mesh_han.uuid()); let group = TransformGroup::OwnedGroup(entity, scene_mesh_group.into()); - let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits, - group, || ( mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) )); + let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits, + group, mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) ); let material = mesh.material.as_ref().unwrap() .data_ref().unwrap(); @@ -517,7 +519,7 @@ impl Renderer for BasicRenderer { } // collect dead entities - self.transform_buffers.tick(); + self.transform_buffers.send_to_gpu(&self.queue); // when buffer storage length does not match the amount of iterated entities, // remove all dead entities, and their buffers, if they weren't iterated over @@ -611,7 +613,7 @@ impl Renderer for BasicRenderer { // Get the bindgroup for job's transform and bind to it using an offset. let bindgroup = self.transform_buffers.bind_group(job.transform_id); let offset = self.transform_buffers.buffer_offset(job.transform_id); - render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]); + render_pass.set_bind_group(1, bindgroup, &[ offset, ]); render_pass.set_bind_group(2, &self.camera_buffer.bindgroup(), &[]); render_pass.set_bind_group(3, &self.light_buffers.bind_group_pair.bindgroup, &[]); diff --git a/lyra-game/src/render/shaders/base.wgsl b/lyra-game/src/render/shaders/base.wgsl index d059aa0..9e86e04 100755 --- a/lyra-game/src/render/shaders/base.wgsl +++ b/lyra-game/src/render/shaders/base.wgsl @@ -21,6 +21,11 @@ struct VertexOutput { @location(2) world_normal: vec3, } +struct TransformData { + transform: mat4x4, + normal_matrix: mat4x4, +} + struct CameraUniform { view: mat4x4, inverse_projection: mat4x4, @@ -51,9 +56,7 @@ struct Lights { }; @group(1) @binding(0) -var u_model_transform: mat4x4; -@group(1) @binding(1) -var u_model_normal_matrix: mat4x4; +var u_model_transform_data: TransformData; @group(2) @binding(0) var u_camera: CameraUniform; @@ -68,13 +71,14 @@ fn vs_main( var out: VertexOutput; out.tex_coords = model.tex_coords; - out.clip_position = u_camera.view_projection * u_model_transform * vec4(model.position, 1.0); + out.clip_position = u_camera.view_projection * u_model_transform_data.transform * vec4(model.position, 1.0); // the normal mat is actually only a mat3x3, but there's a bug in wgpu: https://github.com/gfx-rs/wgpu-rs/issues/36 - let normal_mat = mat3x3(u_model_normal_matrix[0].xyz, u_model_normal_matrix[1].xyz, u_model_normal_matrix[2].xyz); + let normal_mat4 = u_model_transform_data.normal_matrix; + let normal_mat = mat3x3(normal_mat4[0].xyz, normal_mat4[1].xyz, normal_mat4[2].xyz); out.world_normal = normalize(normal_mat * model.normal, ); - var world_position: vec4 = u_model_transform * vec4(model.position, 1.0); + var world_position: vec4 = u_model_transform_data.transform * vec4(model.position, 1.0); out.world_position = world_position.xyz; return out; diff --git a/lyra-game/src/render/transform_buffer_storage.rs b/lyra-game/src/render/transform_buffer_storage.rs index b62d3fd..9fae477 100644 --- a/lyra-game/src/render/transform_buffer_storage.rs +++ b/lyra-game/src/render/transform_buffer_storage.rs @@ -7,6 +7,8 @@ use wgpu::Limits; use std::mem; +use crate::render::avec::AVec; + /// A group id created from a [`TransformGroup`]. /// /// This is mainly created so that [`TransformGroup::OwnedGroup`] can use another group inside of it. @@ -67,8 +69,10 @@ pub struct TransformIndex { struct BufferEntry { pub len: usize, pub bindgroup: wgpu::BindGroup, - pub transform_buffer: wgpu::Buffer, - pub normal_buffer: wgpu::Buffer, + pub buffer: wgpu::Buffer, + transforms: AVec, + //pub normal_buffer: wgpu::Buffer, + } /// A HashMap that caches values for reuse. @@ -159,10 +163,12 @@ impl CachedValMap, + //groups: CachedValMap, + //groups: SlotMap, entries: Vec, limits: wgpu::Limits, max_transform_count: usize, + next_index: usize, } impl TransformBuffers { @@ -181,26 +187,16 @@ impl TransformBuffers { }, count: None, }, - wgpu::BindGroupLayoutEntry { - binding: 1, - visibility: wgpu::ShaderStages::VERTEX, - ty: wgpu::BindingType::Buffer { - ty: wgpu::BufferBindingType::Uniform, - has_dynamic_offset: true, - min_binding_size: None, - }, - count: None, - } ], label: Some("transform_bind_group_layout"), }); let mut s = Self { bindgroup_layout, - groups: Default::default(), entries: Default::default(), - max_transform_count: (limits.max_uniform_buffer_binding_size / 2) as usize / (mem::size_of::()), + max_transform_count: (limits.max_uniform_buffer_binding_size) as usize / (limits.min_uniform_buffer_offset_alignment as usize), //(mem::size_of::()), limits, + next_index: 0, }; // create the first uniform buffer @@ -209,73 +205,59 @@ impl TransformBuffers { s } - /// Update an existing transform in the buffers. + /// Write the transform buffers to the gpu. /// - /// # Panics - /// Panics if the `entity_group` is not already inside of the buffers. - #[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))] - pub fn update_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex { - let index = *self.groups.get(entity_group.into()) - .expect("Use 'push_transform' for new entities"); - let entry = self.entries.get_mut(index.entry_index).unwrap(); + /// This uses [`wgpu::Queue::write_buffer`], so the write is not immediately submitted, + /// and instead enqueued internally to happen at the start of the next submit() call. + pub fn send_to_gpu(&mut self, queue: &wgpu::Queue) { + self.next_index = 0; - let normal_matrix = glam::Mat4::from_mat3(normal_matrix); + for entry in &mut self.entries { + entry.len = 0; - // write the transform and normal to the end of the transform - let offset = Self::get_buffer_offset(limits, index) as _; - queue.write_buffer(&entry.transform_buffer, offset, bytemuck::bytes_of(&transform)); - queue.write_buffer(&entry.normal_buffer, offset, bytemuck::bytes_of(&normal_matrix)); + let p = entry.transforms.as_ptr(); + let bytes = unsafe { std::slice::from_raw_parts(p as *const u8, entry.transforms.len() * entry.transforms.align()) }; - index - } - - /// Push a new transform into the buffers. - #[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))] - pub fn push_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex { - self.groups.insert(entity_group.into(), || { - // this closure is only called when there are no values that can be reused, - // so we get a brand new index at the end of the last entry in the chain. - let last = self.entries.last_mut().unwrap(); - - // ensure the gpu buffer is not overflown - debug_assert!(last.len < self.max_transform_count, - "Transform buffer is filled and 'next_indices' was not incremented! \ - Was a new buffer created?"); - - let tidx = last.len; - last.len += 1; - - TransformIndex { - entry_index: self.entries.len() - 1, - transform_index: tidx - } - }); - - self.update_transform(queue, limits, entity_group, transform, normal_matrix) - } - - /// Collect the dead transforms and prepare self to check next time. - pub fn tick(&mut self) { - self.groups.update(); - } - - /// Returns a boolean indicating if the buffer contains this group. - pub fn contains(&self, group: TransformGroup) -> bool { - self.groups.contains(group.into()) + queue.write_buffer(&entry.buffer, 0, bytes); + } } /// Update an existing transform group or if its not existing yet, pushes it to the buffer. /// /// Returns: the index that the transform is at in the buffers. - #[instrument(skip(self, queue, limits, group, transform_fn))] - pub fn update_or_push(&mut self, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform_fn: F) -> TransformIndex - where F: Fn() -> (glam::Mat4, glam::Mat3) + #[instrument(skip(self, device, queue, limits, group, transform, normal_matrix))] + #[inline(always)] + pub fn update_or_push(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex { - let (transform, normal_matrix) = transform_fn(); - if self.contains(group) { - self.update_transform(queue, limits, group, transform, normal_matrix) - } else { - self.push_transform(queue, limits, group, transform, normal_matrix) + // maybe will be used at some point again + let _ = (queue, limits, group); + + let normal_matrix = glam::Mat4::from_mat3(normal_matrix); + + let index = self.next_index; + self.next_index += 1; + + // the index of the entry to put the transform into + let entry_index = index / self.max_transform_count; + // the index of the transform in the buffer + let transform_index = index % self.max_transform_count; + + if entry_index >= self.entries.len() { + self.expand_buffers(device); + } + + let entry = self.entries.get_mut(entry_index).unwrap(); + + // write the transform and normal to the end of the transform + entry.transforms.set_at(transform_index, TransformNormalMatPair { + transform, + normal_mat: normal_matrix, + }); + entry.len += 1; + + TransformIndex { + entry_index: 0, + transform_index: index, } } @@ -297,21 +279,9 @@ impl TransformBuffers { } ); - let normal_mat_buffer = device.create_buffer( - &wgpu::BufferDescriptor { - label: Some(&format!("B_NormalMatrix_{}", self.entries.len())), - usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, - size: max_buffer_sizes, - mapped_at_creation: false, - } - ); + let tran_stride = mem::size_of::(); - let tran_stride = mem::size_of::(); - // although a normal matrix only needs to be a mat3, there's a weird issue with - // misalignment from wgpu or spirv-cross: https://github.com/gfx-rs/wgpu-rs/issues/36 - let norm_stride = mem::size_of::(); - - let transform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + let bindgroup = device.create_bind_group(&wgpu::BindGroupDescriptor { layout: &self.bindgroup_layout, entries: &[ wgpu::BindGroupEntry { @@ -324,42 +294,34 @@ impl TransformBuffers { } ) }, - wgpu::BindGroupEntry { - binding: 1, - resource: wgpu::BindingResource::Buffer( - wgpu::BufferBinding { - buffer: &normal_mat_buffer, - offset: 0, - size: Some(NonZeroU64::new(norm_stride as u64).unwrap()) - } - ) - } ], label: Some("BG_Transforms"), }); + let mut transforms = AVec::new(limits.min_uniform_buffer_offset_alignment as _); + transforms.resize(self.max_transform_count, TransformNormalMatPair { + transform: glam::Mat4::IDENTITY, + normal_mat: glam::Mat4::IDENTITY, + }); + let entry = BufferEntry { - bindgroup: transform_bind_group, - transform_buffer, - normal_buffer: normal_mat_buffer, + bindgroup, + buffer: transform_buffer, len: 0, + + transforms, }; self.entries.push(entry); } /// Returns the bind group for the transform index. + #[inline(always)] pub fn bind_group(&self, transform_id: TransformIndex) -> &wgpu::BindGroup { - let entry = self.entries.get(transform_id.entry_index).unwrap(); + let entry_index = transform_id.transform_index / self.max_transform_count; + let entry = self.entries.get(entry_index).unwrap(); &entry.bindgroup } - /// Get the buffer offset for a transform using wgpu limits. - /// - /// If its possible to borrow immutably, use [`TransformBuffers::buffer_offset`]. - fn get_buffer_offset(limits: &wgpu::Limits, transform_index: TransformIndex) -> u32 { - transform_index.transform_index as u32 * limits.min_uniform_buffer_offset_alignment as u32 - } - /// Returns the offset of the transform inside the bind group buffer. /// /// ```nobuild @@ -367,15 +329,21 @@ impl TransformBuffers { /// let offset = transform_buffers.buffer_offset(job.transform_id); /// render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]); /// ``` + #[inline(always)] pub fn buffer_offset(&self, transform_index: TransformIndex) -> u32 { - Self::get_buffer_offset(&self.limits, transform_index) + //Self::get_buffer_offset(&self.limits, transform_index) + let transform_index = transform_index.transform_index % self.max_transform_count; + let t = transform_index as u32 * self.limits.min_uniform_buffer_offset_alignment as u32; + //debug!("offset: {t}"); + t } /// Returns a boolean indicating if the buffers need to be expanded pub fn needs_expand(&self) -> bool { - self.entries.last() + false + /* self.entries.last() .map(|entry| entry.len >= self.max_transform_count) - .unwrap_or(false) + .unwrap_or(false) */ } }