render: significantly improve performance of TransformBuffers

Before the changes, a release build of 'many-lights' was running at about 130fps, now its 430fps
This commit is contained in:
SeanOMik 2024-04-21 00:54:45 -04:00
parent 24e1c0281e
commit 8eac563229
Signed by: SeanOMik
GPG Key ID: FEC9E2FC15235964
9 changed files with 410 additions and 133 deletions

7
Cargo.lock generated
View File

@ -1849,6 +1849,7 @@ dependencies = [
"tracing-log 0.1.4",
"tracing-subscriber",
"tracing-tracy",
"unique",
"uuid",
"wgpu",
"winit",
@ -3565,6 +3566,12 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]]
name = "unique"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d360722e1f3884f5b14d332185f02ff111f771f0c76a313268fe6af1409aba96"
[[package]]
name = "url"
version = "2.5.0"

View File

@ -15,5 +15,8 @@ fps_counter = "3.0.0"
linker = "/usr/bin/clang"
rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]
[profile.dev]
opt-level = 1
[profile.release]
debug = true

View File

@ -1,6 +1,6 @@
use std::{collections::{HashMap, VecDeque, HashSet}, ptr::NonNull};
use tracing::{debug_span, info_span};
use tracing::{debug_span, info_span, instrument};
use super::System;
@ -60,6 +60,7 @@ impl GraphExecutor {
}
/// Executes the systems in the graph
#[instrument(skip(self, world_ptr, stop_on_error))]
pub fn execute(&mut self, mut world_ptr: NonNull<World>, stop_on_error: bool)
-> Result<Vec<GraphExecutorError>, GraphExecutorError> {
let mut stack = VecDeque::new();
@ -71,13 +72,11 @@ impl GraphExecutor {
let mut possible_errors = Vec::new();
let sys_span = info_span!("graph_exec", system=tracing::field::Empty);
while let Some(node) = stack.pop_front() {
let system = self.systems.get_mut(node.as_str()).unwrap();
sys_span.record("system", system.name.clone());
let _e = sys_span.enter();
let span = info_span!("graph_exec", system=system.name.clone());
let _e = span.enter();
if let Err(e) = system.system.execute(world_ptr)
.map_err(|e| GraphExecutorError::SystemError(node, e)) {

View File

@ -21,7 +21,7 @@ tracing-tracy = { version = "0.11.0", optional = true }
async-std = { version = "1.12.0", features = [ "unstable", "attributes" ] }
cfg-if = "1"
bytemuck = { version = "1.12", features = [ "derive" ] }
bytemuck = { version = "1.12", features = [ "derive", "min_const_generics" ] }
image = { version = "0.24", default-features = false, features = ["png", "jpeg"] }
anyhow = "1.0"
instant = "0.1"
@ -33,6 +33,7 @@ quote = "1.0.29"
uuid = { version = "1.5.0", features = ["v4", "fast-rng"] }
itertools = "0.11.0"
thiserror = "1.0.56"
unique = "0.9.1"
[features]
tracy = ["dep:tracing-tracy"]
tracy = ["dep:tracing-tracy"]

View File

@ -0,0 +1,292 @@
use std::{alloc::Layout, cmp, marker::PhantomData, mem};
use std::{alloc, ptr};
use unique::Unique;
/// A [`Vec`] with its elements aligned to a runtime alignment value.
pub struct AVec<T> {
buf: Unique<u8>,
cap: usize,
len: usize,
align: usize,
_marker: PhantomData<T>,
}
impl<T> AVec<T> {
// Tiny Vecs are dumb. Skip to:
// - 8 if the element size is 1, because any heap allocators are likely
// to round up a request of less than 8 bytes to at least 8 bytes.
// - 4 if elements are moderate-sized (<= 1 KiB).
// - 1 otherwise, to avoid wasting too much space for very short Vecs.
//
// Taken from Rust's standard library RawVec
pub(crate) const MIN_NON_ZERO_CAP: usize = if mem::size_of::<T>() == 1 {
8
} else if mem::size_of::<T>() <= 1024 {
4
} else {
1
};
#[inline]
pub fn new(alignment: usize) -> Self {
debug_assert!(mem::size_of::<T>() > 0, "ZSTs not yet supported");
Self {
buf: Unique::dangling(),
cap: 0,
len: 0,
align: alignment,
_marker: PhantomData
}
}
/// Constructs a new, empty `AVec` with at least the specified capacity.
///
/// The aligned vector will be able to hold at least `capacity` elements without reallocating.
/// This method may allocate for more elements than `capacity`. If `capacity` is zero,
/// the vector will not allocate.
///
/// # Panics
///
/// Panics if the capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn with_capacity(alignment: usize, capacity: usize) -> Self {
let mut s = Self::new(alignment);
if capacity > 0 {
unsafe {
s.grow_amortized(0, capacity);
}
}
s
}
/// Calculates the size of the 'slot' for a single **aligned** item.
#[inline(always)]
fn slot_size(&self) -> usize {
let a = self.align - 1;
mem::align_of::<T>() + (a) & !a
}
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
unsafe fn grow_amortized(&mut self, len: usize, additional: usize) {
debug_assert!(additional > 0);
let required_cap = len.checked_add(additional)
.expect("Capacity overflow");
let cap = cmp::max(self.cap * 2, required_cap);
let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap);
let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align);
let ptr = alloc::alloc(new_layout);
self.buf = Unique::new_unchecked(ptr);
self.cap = cap;
}
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
unsafe fn grow_exact(&mut self, len: usize, additional: usize) {
debug_assert!(additional > 0);
let cap = len.checked_add(additional)
.expect("Capacity overflow");
let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align);
let ptr = alloc::alloc(new_layout);
self.buf = Unique::new_unchecked(ptr);
self.cap = cap;
}
/// Reserves capacity for at least `additional` more elements.
///
/// The collection may reserve more space to speculatively avoid frequent reallocations.
/// After calling `reserve`, capacity will be greater than or equal to
/// `self.len() + additional`. Does nothing if capacity is already sufficient.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn reserve(&mut self, additional: usize) {
debug_assert!(additional > 0);
let remaining = self.capacity().wrapping_sub(self.len);
if additional > remaining {
unsafe { self.grow_amortized(self.len, additional) };
}
}
/// Reserves capacity for `additional` more elements.
///
/// Unlike [`reserve`], this will not over-allocate to speculatively avoid frequent
/// reallocations. After calling `reserve_exact`, capacity will be equal to
/// `self.len() + additional`. Does nothing if the capacity is already sufficient.
///
/// Prefer [`reserve`] if future insertions are expected.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
let remaining = self.capacity().wrapping_sub(self.len);
if additional > remaining {
unsafe { self.grow_exact(self.len, additional) };
}
}
/// Appends an element to the back of the collection.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn push(&mut self, val: T) {
if self.len == self.cap {
self.reserve(self.slot_size());
}
unsafe {
// SAFETY: the length is ensured to be less than the capacity.
self.set_at_unchecked(self.len, val);
}
self.len += 1;
}
/// Sets an element at position `idx` within the vector to `val`.
///
/// # Unsafe
///
/// If `self.len > idx`, bytes past the length of the vector will be written to, potentially
/// also writing past the capacity of the vector.
#[inline(always)]
unsafe fn set_at_unchecked(&mut self, idx: usize, val: T) {
let ptr = self.buf
.as_ptr()
.add(idx * self.slot_size());
std::ptr::write(ptr.cast::<T>(), val);
}
/// Sets an element at position `idx` within the vector to `val`.
///
/// # Panics
///
/// Panics if `idx >= self.len`.
#[inline(always)]
pub fn set_at(&mut self, idx: usize, val: T) {
assert!(self.len > idx);
unsafe {
self.set_at_unchecked(idx, val);
}
}
/// Shortens the vector, keeping the first `len` elements and dropping the rest.
///
/// If `len` is greater or equal to the vectors current length, this has no effect.
#[inline]
pub fn truncate(&mut self, len: usize) {
if len > self.len {
return;
}
unsafe {
// drop each element past the new length
for i in len..self.len {
let ptr = self.buf.as_ptr()
.add(i * self.slot_size())
.cast::<T>();
ptr::drop_in_place(ptr);
}
}
self.len = len;
}
#[inline(always)]
pub fn as_ptr(&self) -> *const u8 {
self.buf.as_ptr()
}
#[inline(always)]
pub fn as_mut_ptr(&self) -> *mut u8 {
self.buf.as_ptr()
}
/// Returns the alignment of the elements in the vector.
#[inline(always)]
pub fn align(&self) -> usize {
self.align
}
/// Returns the length of the vector.
#[inline(always)]
pub fn len(&self) -> usize {
self.len
}
/// Returns the capacity of the vector.
///
/// The capacity is the amount of elements that the vector can store without reallocating.
#[inline(always)]
pub fn capacity(&self) -> usize {
self.cap
}
}
impl<T: Clone> AVec<T> {
/// Resized the `AVec` in-place so that `len` is equal to `new_len`.
///
/// If `new_len` is greater than `len`, the `AVec` is extended by the difference, and
/// each additional slot is filled with `value`. If `new_len` is less than `len`,
/// the `AVec` will be truncated by to be `new_len`
///
/// This method requires `T` to implement [`Clone`] in order to clone the passed value.
///
/// # Panics
///
/// Panics if the new capacity exceeds `usize::MAX` bytes.
#[inline]
pub fn resize(&mut self, new_len: usize, value: T) {
if new_len > self.len {
self.reserve(new_len - self.len);
unsafe {
let mut ptr = self.buf
.as_ptr().add(self.len * self.slot_size());
// write all elements besides the last one
for _ in 1..new_len {
std::ptr::write(ptr.cast::<T>(), value.clone());
ptr = ptr.add(self.slot_size());
self.len += 1;
}
if new_len > 0 {
// the last element can be written without cloning
std::ptr::write(ptr.cast::<T>(), value.clone());
self.len += 1;
}
self.len = new_len;
}
} else {
self.truncate(new_len);
}
}
}

View File

@ -12,4 +12,5 @@ pub mod camera;
pub mod window;
pub mod transform_buffer_storage;
pub mod light;
pub mod light_cull_compute;
pub mod light_cull_compute;
pub mod avec;

View File

@ -169,7 +169,7 @@ impl BasicRenderer {
format: surface_format,
width: size.width,
height: size.height,
present_mode,
present_mode: wgpu::PresentMode::Immediate,
alpha_mode: surface_caps.alpha_modes[0],
view_formats: vec![],
};
@ -448,7 +448,8 @@ impl Renderer for BasicRenderer {
alive_entities.insert(entity);
if let Some((mesh_han, mesh_epoch)) = mesh_pair {
let interop_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
// TODO: speed up interpolating transforms
let interop_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
if let Some(mesh) = mesh_han.data_ref() {
// if process mesh did not just create a new mesh, and the epoch
@ -464,8 +465,8 @@ impl Renderer for BasicRenderer {
}
let group = TransformGroup::EntityRes(entity, mesh_han.uuid());
let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits,
group, || ( interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation) ));
let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits,
group, interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation));
let material = mesh.material.as_ref().unwrap()
.data_ref().unwrap();
@ -482,7 +483,8 @@ impl Renderer for BasicRenderer {
lyra_scene::system_update_world_transforms(scene.world(), view).unwrap();
}
let interpo_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
// TODO: speed up interpolating transforms
let interpo_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
for (mesh_han, pos) in scene.world().view_iter::<(&MeshHandle, &WorldTransform)>() {
if let Some(mesh) = mesh_han.data_ref() {
@ -502,8 +504,8 @@ impl Renderer for BasicRenderer {
let scene_mesh_group = TransformGroup::Res(scene_han.uuid(), mesh_han.uuid());
let group = TransformGroup::OwnedGroup(entity, scene_mesh_group.into());
let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits,
group, || ( mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) ));
let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits,
group, mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) );
let material = mesh.material.as_ref().unwrap()
.data_ref().unwrap();
@ -517,7 +519,7 @@ impl Renderer for BasicRenderer {
}
// collect dead entities
self.transform_buffers.tick();
self.transform_buffers.send_to_gpu(&self.queue);
// when buffer storage length does not match the amount of iterated entities,
// remove all dead entities, and their buffers, if they weren't iterated over
@ -611,7 +613,7 @@ impl Renderer for BasicRenderer {
// Get the bindgroup for job's transform and bind to it using an offset.
let bindgroup = self.transform_buffers.bind_group(job.transform_id);
let offset = self.transform_buffers.buffer_offset(job.transform_id);
render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]);
render_pass.set_bind_group(1, bindgroup, &[ offset, ]);
render_pass.set_bind_group(2, &self.camera_buffer.bindgroup(), &[]);
render_pass.set_bind_group(3, &self.light_buffers.bind_group_pair.bindgroup, &[]);

View File

@ -21,6 +21,11 @@ struct VertexOutput {
@location(2) world_normal: vec3<f32>,
}
struct TransformData {
transform: mat4x4<f32>,
normal_matrix: mat4x4<f32>,
}
struct CameraUniform {
view: mat4x4<f32>,
inverse_projection: mat4x4<f32>,
@ -51,9 +56,7 @@ struct Lights {
};
@group(1) @binding(0)
var<uniform> u_model_transform: mat4x4<f32>;
@group(1) @binding(1)
var<uniform> u_model_normal_matrix: mat4x4<f32>;
var<uniform> u_model_transform_data: TransformData;
@group(2) @binding(0)
var<uniform> u_camera: CameraUniform;
@ -68,13 +71,14 @@ fn vs_main(
var out: VertexOutput;
out.tex_coords = model.tex_coords;
out.clip_position = u_camera.view_projection * u_model_transform * vec4<f32>(model.position, 1.0);
out.clip_position = u_camera.view_projection * u_model_transform_data.transform * vec4<f32>(model.position, 1.0);
// the normal mat is actually only a mat3x3, but there's a bug in wgpu: https://github.com/gfx-rs/wgpu-rs/issues/36
let normal_mat = mat3x3(u_model_normal_matrix[0].xyz, u_model_normal_matrix[1].xyz, u_model_normal_matrix[2].xyz);
let normal_mat4 = u_model_transform_data.normal_matrix;
let normal_mat = mat3x3(normal_mat4[0].xyz, normal_mat4[1].xyz, normal_mat4[2].xyz);
out.world_normal = normalize(normal_mat * model.normal, );
var world_position: vec4<f32> = u_model_transform * vec4<f32>(model.position, 1.0);
var world_position: vec4<f32> = u_model_transform_data.transform * vec4<f32>(model.position, 1.0);
out.world_position = world_position.xyz;
return out;

View File

@ -7,6 +7,8 @@ use wgpu::Limits;
use std::mem;
use crate::render::avec::AVec;
/// A group id created from a [`TransformGroup`].
///
/// This is mainly created so that [`TransformGroup::OwnedGroup`] can use another group inside of it.
@ -67,8 +69,10 @@ pub struct TransformIndex {
struct BufferEntry {
pub len: usize,
pub bindgroup: wgpu::BindGroup,
pub transform_buffer: wgpu::Buffer,
pub normal_buffer: wgpu::Buffer,
pub buffer: wgpu::Buffer,
transforms: AVec<TransformNormalMatPair>,
//pub normal_buffer: wgpu::Buffer,
}
/// A HashMap that caches values for reuse.
@ -159,10 +163,12 @@ impl<K: Hash + Eq + PartialEq + Clone, V: Clone, S: BuildHasher> CachedValMap<K,
/// update, and retrieve the transforms.
pub struct TransformBuffers {
pub bindgroup_layout: wgpu::BindGroupLayout,
groups: CachedValMap<TransformGroupId, TransformIndex>,
//groups: CachedValMap<TransformGroupId, TransformIndex>,
//groups: SlotMap<TransformGroupId, TransformIndex>,
entries: Vec<BufferEntry>,
limits: wgpu::Limits,
max_transform_count: usize,
next_index: usize,
}
impl TransformBuffers {
@ -181,26 +187,16 @@ impl TransformBuffers {
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::VERTEX,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: true,
min_binding_size: None,
},
count: None,
}
],
label: Some("transform_bind_group_layout"),
});
let mut s = Self {
bindgroup_layout,
groups: Default::default(),
entries: Default::default(),
max_transform_count: (limits.max_uniform_buffer_binding_size / 2) as usize / (mem::size_of::<glam::Mat4>()),
max_transform_count: (limits.max_uniform_buffer_binding_size) as usize / (limits.min_uniform_buffer_offset_alignment as usize), //(mem::size_of::<glam::Mat4>()),
limits,
next_index: 0,
};
// create the first uniform buffer
@ -209,73 +205,59 @@ impl TransformBuffers {
s
}
/// Update an existing transform in the buffers.
/// Write the transform buffers to the gpu.
///
/// # Panics
/// Panics if the `entity_group` is not already inside of the buffers.
#[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))]
pub fn update_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex {
let index = *self.groups.get(entity_group.into())
.expect("Use 'push_transform' for new entities");
let entry = self.entries.get_mut(index.entry_index).unwrap();
/// This uses [`wgpu::Queue::write_buffer`], so the write is not immediately submitted,
/// and instead enqueued internally to happen at the start of the next submit() call.
pub fn send_to_gpu(&mut self, queue: &wgpu::Queue) {
self.next_index = 0;
let normal_matrix = glam::Mat4::from_mat3(normal_matrix);
for entry in &mut self.entries {
entry.len = 0;
// write the transform and normal to the end of the transform
let offset = Self::get_buffer_offset(limits, index) as _;
queue.write_buffer(&entry.transform_buffer, offset, bytemuck::bytes_of(&transform));
queue.write_buffer(&entry.normal_buffer, offset, bytemuck::bytes_of(&normal_matrix));
let p = entry.transforms.as_ptr();
let bytes = unsafe { std::slice::from_raw_parts(p as *const u8, entry.transforms.len() * entry.transforms.align()) };
index
}
/// Push a new transform into the buffers.
#[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))]
pub fn push_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex {
self.groups.insert(entity_group.into(), || {
// this closure is only called when there are no values that can be reused,
// so we get a brand new index at the end of the last entry in the chain.
let last = self.entries.last_mut().unwrap();
// ensure the gpu buffer is not overflown
debug_assert!(last.len < self.max_transform_count,
"Transform buffer is filled and 'next_indices' was not incremented! \
Was a new buffer created?");
let tidx = last.len;
last.len += 1;
TransformIndex {
entry_index: self.entries.len() - 1,
transform_index: tidx
}
});
self.update_transform(queue, limits, entity_group, transform, normal_matrix)
}
/// Collect the dead transforms and prepare self to check next time.
pub fn tick(&mut self) {
self.groups.update();
}
/// Returns a boolean indicating if the buffer contains this group.
pub fn contains(&self, group: TransformGroup) -> bool {
self.groups.contains(group.into())
queue.write_buffer(&entry.buffer, 0, bytes);
}
}
/// Update an existing transform group or if its not existing yet, pushes it to the buffer.
///
/// Returns: the index that the transform is at in the buffers.
#[instrument(skip(self, queue, limits, group, transform_fn))]
pub fn update_or_push<F>(&mut self, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform_fn: F) -> TransformIndex
where F: Fn() -> (glam::Mat4, glam::Mat3)
#[instrument(skip(self, device, queue, limits, group, transform, normal_matrix))]
#[inline(always)]
pub fn update_or_push(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex
{
let (transform, normal_matrix) = transform_fn();
if self.contains(group) {
self.update_transform(queue, limits, group, transform, normal_matrix)
} else {
self.push_transform(queue, limits, group, transform, normal_matrix)
// maybe will be used at some point again
let _ = (queue, limits, group);
let normal_matrix = glam::Mat4::from_mat3(normal_matrix);
let index = self.next_index;
self.next_index += 1;
// the index of the entry to put the transform into
let entry_index = index / self.max_transform_count;
// the index of the transform in the buffer
let transform_index = index % self.max_transform_count;
if entry_index >= self.entries.len() {
self.expand_buffers(device);
}
let entry = self.entries.get_mut(entry_index).unwrap();
// write the transform and normal to the end of the transform
entry.transforms.set_at(transform_index, TransformNormalMatPair {
transform,
normal_mat: normal_matrix,
});
entry.len += 1;
TransformIndex {
entry_index: 0,
transform_index: index,
}
}
@ -297,21 +279,9 @@ impl TransformBuffers {
}
);
let normal_mat_buffer = device.create_buffer(
&wgpu::BufferDescriptor {
label: Some(&format!("B_NormalMatrix_{}", self.entries.len())),
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
size: max_buffer_sizes,
mapped_at_creation: false,
}
);
let tran_stride = mem::size_of::<TransformNormalMatPair>();
let tran_stride = mem::size_of::<glam::Mat4>();
// although a normal matrix only needs to be a mat3, there's a weird issue with
// misalignment from wgpu or spirv-cross: https://github.com/gfx-rs/wgpu-rs/issues/36
let norm_stride = mem::size_of::<glam::Mat4>();
let transform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
let bindgroup = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &self.bindgroup_layout,
entries: &[
wgpu::BindGroupEntry {
@ -324,42 +294,34 @@ impl TransformBuffers {
}
)
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Buffer(
wgpu::BufferBinding {
buffer: &normal_mat_buffer,
offset: 0,
size: Some(NonZeroU64::new(norm_stride as u64).unwrap())
}
)
}
],
label: Some("BG_Transforms"),
});
let mut transforms = AVec::new(limits.min_uniform_buffer_offset_alignment as _);
transforms.resize(self.max_transform_count, TransformNormalMatPair {
transform: glam::Mat4::IDENTITY,
normal_mat: glam::Mat4::IDENTITY,
});
let entry = BufferEntry {
bindgroup: transform_bind_group,
transform_buffer,
normal_buffer: normal_mat_buffer,
bindgroup,
buffer: transform_buffer,
len: 0,
transforms,
};
self.entries.push(entry);
}
/// Returns the bind group for the transform index.
#[inline(always)]
pub fn bind_group(&self, transform_id: TransformIndex) -> &wgpu::BindGroup {
let entry = self.entries.get(transform_id.entry_index).unwrap();
let entry_index = transform_id.transform_index / self.max_transform_count;
let entry = self.entries.get(entry_index).unwrap();
&entry.bindgroup
}
/// Get the buffer offset for a transform using wgpu limits.
///
/// If its possible to borrow immutably, use [`TransformBuffers::buffer_offset`].
fn get_buffer_offset(limits: &wgpu::Limits, transform_index: TransformIndex) -> u32 {
transform_index.transform_index as u32 * limits.min_uniform_buffer_offset_alignment as u32
}
/// Returns the offset of the transform inside the bind group buffer.
///
/// ```nobuild
@ -367,15 +329,21 @@ impl TransformBuffers {
/// let offset = transform_buffers.buffer_offset(job.transform_id);
/// render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]);
/// ```
#[inline(always)]
pub fn buffer_offset(&self, transform_index: TransformIndex) -> u32 {
Self::get_buffer_offset(&self.limits, transform_index)
//Self::get_buffer_offset(&self.limits, transform_index)
let transform_index = transform_index.transform_index % self.max_transform_count;
let t = transform_index as u32 * self.limits.min_uniform_buffer_offset_alignment as u32;
//debug!("offset: {t}");
t
}
/// Returns a boolean indicating if the buffers need to be expanded
pub fn needs_expand(&self) -> bool {
self.entries.last()
false
/* self.entries.last()
.map(|entry| entry.len >= self.max_transform_count)
.unwrap_or(false)
.unwrap_or(false) */
}
}