render: significantly improve performance of TransformBuffers
Before the changes, a release build of 'many-lights' was running at about 130fps, now its 430fps
This commit is contained in:
parent
24e1c0281e
commit
8eac563229
|
@ -1849,6 +1849,7 @@ dependencies = [
|
|||
"tracing-log 0.1.4",
|
||||
"tracing-subscriber",
|
||||
"tracing-tracy",
|
||||
"unique",
|
||||
"uuid",
|
||||
"wgpu",
|
||||
"winit",
|
||||
|
@ -3565,6 +3566,12 @@ version = "0.2.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
|
||||
|
||||
[[package]]
|
||||
name = "unique"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d360722e1f3884f5b14d332185f02ff111f771f0c76a313268fe6af1409aba96"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.5.0"
|
||||
|
|
|
@ -15,5 +15,8 @@ fps_counter = "3.0.0"
|
|||
linker = "/usr/bin/clang"
|
||||
rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 1
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
|
@ -1,6 +1,6 @@
|
|||
use std::{collections::{HashMap, VecDeque, HashSet}, ptr::NonNull};
|
||||
|
||||
use tracing::{debug_span, info_span};
|
||||
use tracing::{debug_span, info_span, instrument};
|
||||
|
||||
use super::System;
|
||||
|
||||
|
@ -60,6 +60,7 @@ impl GraphExecutor {
|
|||
}
|
||||
|
||||
/// Executes the systems in the graph
|
||||
#[instrument(skip(self, world_ptr, stop_on_error))]
|
||||
pub fn execute(&mut self, mut world_ptr: NonNull<World>, stop_on_error: bool)
|
||||
-> Result<Vec<GraphExecutorError>, GraphExecutorError> {
|
||||
let mut stack = VecDeque::new();
|
||||
|
@ -71,13 +72,11 @@ impl GraphExecutor {
|
|||
|
||||
let mut possible_errors = Vec::new();
|
||||
|
||||
let sys_span = info_span!("graph_exec", system=tracing::field::Empty);
|
||||
|
||||
while let Some(node) = stack.pop_front() {
|
||||
let system = self.systems.get_mut(node.as_str()).unwrap();
|
||||
|
||||
sys_span.record("system", system.name.clone());
|
||||
let _e = sys_span.enter();
|
||||
let span = info_span!("graph_exec", system=system.name.clone());
|
||||
let _e = span.enter();
|
||||
|
||||
if let Err(e) = system.system.execute(world_ptr)
|
||||
.map_err(|e| GraphExecutorError::SystemError(node, e)) {
|
||||
|
|
|
@ -21,7 +21,7 @@ tracing-tracy = { version = "0.11.0", optional = true }
|
|||
|
||||
async-std = { version = "1.12.0", features = [ "unstable", "attributes" ] }
|
||||
cfg-if = "1"
|
||||
bytemuck = { version = "1.12", features = [ "derive" ] }
|
||||
bytemuck = { version = "1.12", features = [ "derive", "min_const_generics" ] }
|
||||
image = { version = "0.24", default-features = false, features = ["png", "jpeg"] }
|
||||
anyhow = "1.0"
|
||||
instant = "0.1"
|
||||
|
@ -33,6 +33,7 @@ quote = "1.0.29"
|
|||
uuid = { version = "1.5.0", features = ["v4", "fast-rng"] }
|
||||
itertools = "0.11.0"
|
||||
thiserror = "1.0.56"
|
||||
unique = "0.9.1"
|
||||
|
||||
[features]
|
||||
tracy = ["dep:tracing-tracy"]
|
||||
tracy = ["dep:tracing-tracy"]
|
||||
|
|
|
@ -0,0 +1,292 @@
|
|||
use std::{alloc::Layout, cmp, marker::PhantomData, mem};
|
||||
|
||||
use std::{alloc, ptr};
|
||||
use unique::Unique;
|
||||
|
||||
/// A [`Vec`] with its elements aligned to a runtime alignment value.
|
||||
pub struct AVec<T> {
|
||||
buf: Unique<u8>,
|
||||
cap: usize,
|
||||
len: usize,
|
||||
align: usize,
|
||||
_marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> AVec<T> {
|
||||
// Tiny Vecs are dumb. Skip to:
|
||||
// - 8 if the element size is 1, because any heap allocators are likely
|
||||
// to round up a request of less than 8 bytes to at least 8 bytes.
|
||||
// - 4 if elements are moderate-sized (<= 1 KiB).
|
||||
// - 1 otherwise, to avoid wasting too much space for very short Vecs.
|
||||
//
|
||||
// Taken from Rust's standard library RawVec
|
||||
pub(crate) const MIN_NON_ZERO_CAP: usize = if mem::size_of::<T>() == 1 {
|
||||
8
|
||||
} else if mem::size_of::<T>() <= 1024 {
|
||||
4
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
#[inline]
|
||||
pub fn new(alignment: usize) -> Self {
|
||||
debug_assert!(mem::size_of::<T>() > 0, "ZSTs not yet supported");
|
||||
|
||||
Self {
|
||||
buf: Unique::dangling(),
|
||||
cap: 0,
|
||||
len: 0,
|
||||
align: alignment,
|
||||
_marker: PhantomData
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new, empty `AVec` with at least the specified capacity.
|
||||
///
|
||||
/// The aligned vector will be able to hold at least `capacity` elements without reallocating.
|
||||
/// This method may allocate for more elements than `capacity`. If `capacity` is zero,
|
||||
/// the vector will not allocate.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
pub fn with_capacity(alignment: usize, capacity: usize) -> Self {
|
||||
let mut s = Self::new(alignment);
|
||||
|
||||
if capacity > 0 {
|
||||
unsafe {
|
||||
s.grow_amortized(0, capacity);
|
||||
}
|
||||
}
|
||||
|
||||
s
|
||||
}
|
||||
|
||||
/// Calculates the size of the 'slot' for a single **aligned** item.
|
||||
#[inline(always)]
|
||||
fn slot_size(&self) -> usize {
|
||||
let a = self.align - 1;
|
||||
mem::align_of::<T>() + (a) & !a
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
unsafe fn grow_amortized(&mut self, len: usize, additional: usize) {
|
||||
debug_assert!(additional > 0);
|
||||
|
||||
let required_cap = len.checked_add(additional)
|
||||
.expect("Capacity overflow");
|
||||
|
||||
let cap = cmp::max(self.cap * 2, required_cap);
|
||||
let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap);
|
||||
|
||||
let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align);
|
||||
|
||||
let ptr = alloc::alloc(new_layout);
|
||||
self.buf = Unique::new_unchecked(ptr);
|
||||
self.cap = cap;
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
unsafe fn grow_exact(&mut self, len: usize, additional: usize) {
|
||||
debug_assert!(additional > 0);
|
||||
|
||||
let cap = len.checked_add(additional)
|
||||
.expect("Capacity overflow");
|
||||
|
||||
let new_layout = Layout::from_size_align_unchecked(cap * self.slot_size(), self.align);
|
||||
|
||||
let ptr = alloc::alloc(new_layout);
|
||||
self.buf = Unique::new_unchecked(ptr);
|
||||
self.cap = cap;
|
||||
}
|
||||
|
||||
/// Reserves capacity for at least `additional` more elements.
|
||||
///
|
||||
/// The collection may reserve more space to speculatively avoid frequent reallocations.
|
||||
/// After calling `reserve`, capacity will be greater than or equal to
|
||||
/// `self.len() + additional`. Does nothing if capacity is already sufficient.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
pub fn reserve(&mut self, additional: usize) {
|
||||
debug_assert!(additional > 0);
|
||||
|
||||
let remaining = self.capacity().wrapping_sub(self.len);
|
||||
|
||||
if additional > remaining {
|
||||
unsafe { self.grow_amortized(self.len, additional) };
|
||||
}
|
||||
}
|
||||
|
||||
/// Reserves capacity for `additional` more elements.
|
||||
///
|
||||
/// Unlike [`reserve`], this will not over-allocate to speculatively avoid frequent
|
||||
/// reallocations. After calling `reserve_exact`, capacity will be equal to
|
||||
/// `self.len() + additional`. Does nothing if the capacity is already sufficient.
|
||||
///
|
||||
/// Prefer [`reserve`] if future insertions are expected.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
pub fn reserve_exact(&mut self, additional: usize) {
|
||||
let remaining = self.capacity().wrapping_sub(self.len);
|
||||
|
||||
if additional > remaining {
|
||||
unsafe { self.grow_exact(self.len, additional) };
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends an element to the back of the collection.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
pub fn push(&mut self, val: T) {
|
||||
if self.len == self.cap {
|
||||
self.reserve(self.slot_size());
|
||||
}
|
||||
|
||||
unsafe {
|
||||
// SAFETY: the length is ensured to be less than the capacity.
|
||||
self.set_at_unchecked(self.len, val);
|
||||
}
|
||||
|
||||
self.len += 1;
|
||||
}
|
||||
|
||||
/// Sets an element at position `idx` within the vector to `val`.
|
||||
///
|
||||
/// # Unsafe
|
||||
///
|
||||
/// If `self.len > idx`, bytes past the length of the vector will be written to, potentially
|
||||
/// also writing past the capacity of the vector.
|
||||
#[inline(always)]
|
||||
unsafe fn set_at_unchecked(&mut self, idx: usize, val: T) {
|
||||
let ptr = self.buf
|
||||
.as_ptr()
|
||||
.add(idx * self.slot_size());
|
||||
|
||||
std::ptr::write(ptr.cast::<T>(), val);
|
||||
}
|
||||
|
||||
/// Sets an element at position `idx` within the vector to `val`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `idx >= self.len`.
|
||||
#[inline(always)]
|
||||
pub fn set_at(&mut self, idx: usize, val: T) {
|
||||
assert!(self.len > idx);
|
||||
|
||||
unsafe {
|
||||
self.set_at_unchecked(idx, val);
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortens the vector, keeping the first `len` elements and dropping the rest.
|
||||
///
|
||||
/// If `len` is greater or equal to the vector’s current length, this has no effect.
|
||||
#[inline]
|
||||
pub fn truncate(&mut self, len: usize) {
|
||||
if len > self.len {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
// drop each element past the new length
|
||||
for i in len..self.len {
|
||||
let ptr = self.buf.as_ptr()
|
||||
.add(i * self.slot_size())
|
||||
.cast::<T>();
|
||||
|
||||
ptr::drop_in_place(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
self.len = len;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn as_ptr(&self) -> *const u8 {
|
||||
self.buf.as_ptr()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn as_mut_ptr(&self) -> *mut u8 {
|
||||
self.buf.as_ptr()
|
||||
}
|
||||
|
||||
/// Returns the alignment of the elements in the vector.
|
||||
#[inline(always)]
|
||||
pub fn align(&self) -> usize {
|
||||
self.align
|
||||
}
|
||||
|
||||
/// Returns the length of the vector.
|
||||
#[inline(always)]
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
|
||||
/// Returns the capacity of the vector.
|
||||
///
|
||||
/// The capacity is the amount of elements that the vector can store without reallocating.
|
||||
#[inline(always)]
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.cap
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone> AVec<T> {
|
||||
/// Resized the `AVec` in-place so that `len` is equal to `new_len`.
|
||||
///
|
||||
/// If `new_len` is greater than `len`, the `AVec` is extended by the difference, and
|
||||
/// each additional slot is filled with `value`. If `new_len` is less than `len`,
|
||||
/// the `AVec` will be truncated by to be `new_len`
|
||||
///
|
||||
/// This method requires `T` to implement [`Clone`] in order to clone the passed value.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the new capacity exceeds `usize::MAX` bytes.
|
||||
#[inline]
|
||||
pub fn resize(&mut self, new_len: usize, value: T) {
|
||||
if new_len > self.len {
|
||||
self.reserve(new_len - self.len);
|
||||
|
||||
unsafe {
|
||||
let mut ptr = self.buf
|
||||
.as_ptr().add(self.len * self.slot_size());
|
||||
|
||||
// write all elements besides the last one
|
||||
for _ in 1..new_len {
|
||||
std::ptr::write(ptr.cast::<T>(), value.clone());
|
||||
ptr = ptr.add(self.slot_size());
|
||||
self.len += 1;
|
||||
}
|
||||
|
||||
if new_len > 0 {
|
||||
// the last element can be written without cloning
|
||||
std::ptr::write(ptr.cast::<T>(), value.clone());
|
||||
self.len += 1;
|
||||
}
|
||||
|
||||
self.len = new_len;
|
||||
}
|
||||
} else {
|
||||
self.truncate(new_len);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,4 +12,5 @@ pub mod camera;
|
|||
pub mod window;
|
||||
pub mod transform_buffer_storage;
|
||||
pub mod light;
|
||||
pub mod light_cull_compute;
|
||||
pub mod light_cull_compute;
|
||||
pub mod avec;
|
|
@ -169,7 +169,7 @@ impl BasicRenderer {
|
|||
format: surface_format,
|
||||
width: size.width,
|
||||
height: size.height,
|
||||
present_mode,
|
||||
present_mode: wgpu::PresentMode::Immediate,
|
||||
alpha_mode: surface_caps.alpha_modes[0],
|
||||
view_formats: vec![],
|
||||
};
|
||||
|
@ -448,7 +448,8 @@ impl Renderer for BasicRenderer {
|
|||
alive_entities.insert(entity);
|
||||
|
||||
if let Some((mesh_han, mesh_epoch)) = mesh_pair {
|
||||
let interop_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
|
||||
// TODO: speed up interpolating transforms
|
||||
let interop_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
|
||||
|
||||
if let Some(mesh) = mesh_han.data_ref() {
|
||||
// if process mesh did not just create a new mesh, and the epoch
|
||||
|
@ -464,8 +465,8 @@ impl Renderer for BasicRenderer {
|
|||
}
|
||||
|
||||
let group = TransformGroup::EntityRes(entity, mesh_han.uuid());
|
||||
let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits,
|
||||
group, || ( interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation) ));
|
||||
let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits,
|
||||
group, interop_pos.calculate_mat4(), glam::Mat3::from_quat(interop_pos.rotation));
|
||||
|
||||
let material = mesh.material.as_ref().unwrap()
|
||||
.data_ref().unwrap();
|
||||
|
@ -482,7 +483,8 @@ impl Renderer for BasicRenderer {
|
|||
lyra_scene::system_update_world_transforms(scene.world(), view).unwrap();
|
||||
}
|
||||
|
||||
let interpo_pos = self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
|
||||
// TODO: speed up interpolating transforms
|
||||
let interpo_pos = *transform; //self.interpolate_transforms(now_inst, last_epoch, entity, &transform, transform_epoch);
|
||||
|
||||
for (mesh_han, pos) in scene.world().view_iter::<(&MeshHandle, &WorldTransform)>() {
|
||||
if let Some(mesh) = mesh_han.data_ref() {
|
||||
|
@ -502,8 +504,8 @@ impl Renderer for BasicRenderer {
|
|||
|
||||
let scene_mesh_group = TransformGroup::Res(scene_han.uuid(), mesh_han.uuid());
|
||||
let group = TransformGroup::OwnedGroup(entity, scene_mesh_group.into());
|
||||
let transform_id = self.transform_buffers.update_or_push(&self.queue, &self.render_limits,
|
||||
group, || ( mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) ));
|
||||
let transform_id = self.transform_buffers.update_or_push(&self.device, &self.queue, &self.render_limits,
|
||||
group, mesh_interpo.calculate_mat4(), glam::Mat3::from_quat(mesh_interpo.rotation) );
|
||||
|
||||
let material = mesh.material.as_ref().unwrap()
|
||||
.data_ref().unwrap();
|
||||
|
@ -517,7 +519,7 @@ impl Renderer for BasicRenderer {
|
|||
}
|
||||
|
||||
// collect dead entities
|
||||
self.transform_buffers.tick();
|
||||
self.transform_buffers.send_to_gpu(&self.queue);
|
||||
|
||||
// when buffer storage length does not match the amount of iterated entities,
|
||||
// remove all dead entities, and their buffers, if they weren't iterated over
|
||||
|
@ -611,7 +613,7 @@ impl Renderer for BasicRenderer {
|
|||
// Get the bindgroup for job's transform and bind to it using an offset.
|
||||
let bindgroup = self.transform_buffers.bind_group(job.transform_id);
|
||||
let offset = self.transform_buffers.buffer_offset(job.transform_id);
|
||||
render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]);
|
||||
render_pass.set_bind_group(1, bindgroup, &[ offset, ]);
|
||||
|
||||
render_pass.set_bind_group(2, &self.camera_buffer.bindgroup(), &[]);
|
||||
render_pass.set_bind_group(3, &self.light_buffers.bind_group_pair.bindgroup, &[]);
|
||||
|
|
|
@ -21,6 +21,11 @@ struct VertexOutput {
|
|||
@location(2) world_normal: vec3<f32>,
|
||||
}
|
||||
|
||||
struct TransformData {
|
||||
transform: mat4x4<f32>,
|
||||
normal_matrix: mat4x4<f32>,
|
||||
}
|
||||
|
||||
struct CameraUniform {
|
||||
view: mat4x4<f32>,
|
||||
inverse_projection: mat4x4<f32>,
|
||||
|
@ -51,9 +56,7 @@ struct Lights {
|
|||
};
|
||||
|
||||
@group(1) @binding(0)
|
||||
var<uniform> u_model_transform: mat4x4<f32>;
|
||||
@group(1) @binding(1)
|
||||
var<uniform> u_model_normal_matrix: mat4x4<f32>;
|
||||
var<uniform> u_model_transform_data: TransformData;
|
||||
|
||||
@group(2) @binding(0)
|
||||
var<uniform> u_camera: CameraUniform;
|
||||
|
@ -68,13 +71,14 @@ fn vs_main(
|
|||
var out: VertexOutput;
|
||||
|
||||
out.tex_coords = model.tex_coords;
|
||||
out.clip_position = u_camera.view_projection * u_model_transform * vec4<f32>(model.position, 1.0);
|
||||
out.clip_position = u_camera.view_projection * u_model_transform_data.transform * vec4<f32>(model.position, 1.0);
|
||||
|
||||
// the normal mat is actually only a mat3x3, but there's a bug in wgpu: https://github.com/gfx-rs/wgpu-rs/issues/36
|
||||
let normal_mat = mat3x3(u_model_normal_matrix[0].xyz, u_model_normal_matrix[1].xyz, u_model_normal_matrix[2].xyz);
|
||||
let normal_mat4 = u_model_transform_data.normal_matrix;
|
||||
let normal_mat = mat3x3(normal_mat4[0].xyz, normal_mat4[1].xyz, normal_mat4[2].xyz);
|
||||
out.world_normal = normalize(normal_mat * model.normal, );
|
||||
|
||||
var world_position: vec4<f32> = u_model_transform * vec4<f32>(model.position, 1.0);
|
||||
var world_position: vec4<f32> = u_model_transform_data.transform * vec4<f32>(model.position, 1.0);
|
||||
out.world_position = world_position.xyz;
|
||||
|
||||
return out;
|
||||
|
|
|
@ -7,6 +7,8 @@ use wgpu::Limits;
|
|||
|
||||
use std::mem;
|
||||
|
||||
use crate::render::avec::AVec;
|
||||
|
||||
/// A group id created from a [`TransformGroup`].
|
||||
///
|
||||
/// This is mainly created so that [`TransformGroup::OwnedGroup`] can use another group inside of it.
|
||||
|
@ -67,8 +69,10 @@ pub struct TransformIndex {
|
|||
struct BufferEntry {
|
||||
pub len: usize,
|
||||
pub bindgroup: wgpu::BindGroup,
|
||||
pub transform_buffer: wgpu::Buffer,
|
||||
pub normal_buffer: wgpu::Buffer,
|
||||
pub buffer: wgpu::Buffer,
|
||||
transforms: AVec<TransformNormalMatPair>,
|
||||
//pub normal_buffer: wgpu::Buffer,
|
||||
|
||||
}
|
||||
|
||||
/// A HashMap that caches values for reuse.
|
||||
|
@ -159,10 +163,12 @@ impl<K: Hash + Eq + PartialEq + Clone, V: Clone, S: BuildHasher> CachedValMap<K,
|
|||
/// update, and retrieve the transforms.
|
||||
pub struct TransformBuffers {
|
||||
pub bindgroup_layout: wgpu::BindGroupLayout,
|
||||
groups: CachedValMap<TransformGroupId, TransformIndex>,
|
||||
//groups: CachedValMap<TransformGroupId, TransformIndex>,
|
||||
//groups: SlotMap<TransformGroupId, TransformIndex>,
|
||||
entries: Vec<BufferEntry>,
|
||||
limits: wgpu::Limits,
|
||||
max_transform_count: usize,
|
||||
next_index: usize,
|
||||
}
|
||||
|
||||
impl TransformBuffers {
|
||||
|
@ -181,26 +187,16 @@ impl TransformBuffers {
|
|||
},
|
||||
count: None,
|
||||
},
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 1,
|
||||
visibility: wgpu::ShaderStages::VERTEX,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Uniform,
|
||||
has_dynamic_offset: true,
|
||||
min_binding_size: None,
|
||||
},
|
||||
count: None,
|
||||
}
|
||||
],
|
||||
label: Some("transform_bind_group_layout"),
|
||||
});
|
||||
|
||||
let mut s = Self {
|
||||
bindgroup_layout,
|
||||
groups: Default::default(),
|
||||
entries: Default::default(),
|
||||
max_transform_count: (limits.max_uniform_buffer_binding_size / 2) as usize / (mem::size_of::<glam::Mat4>()),
|
||||
max_transform_count: (limits.max_uniform_buffer_binding_size) as usize / (limits.min_uniform_buffer_offset_alignment as usize), //(mem::size_of::<glam::Mat4>()),
|
||||
limits,
|
||||
next_index: 0,
|
||||
};
|
||||
|
||||
// create the first uniform buffer
|
||||
|
@ -209,73 +205,59 @@ impl TransformBuffers {
|
|||
s
|
||||
}
|
||||
|
||||
/// Update an existing transform in the buffers.
|
||||
/// Write the transform buffers to the gpu.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the `entity_group` is not already inside of the buffers.
|
||||
#[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))]
|
||||
pub fn update_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex {
|
||||
let index = *self.groups.get(entity_group.into())
|
||||
.expect("Use 'push_transform' for new entities");
|
||||
let entry = self.entries.get_mut(index.entry_index).unwrap();
|
||||
/// This uses [`wgpu::Queue::write_buffer`], so the write is not immediately submitted,
|
||||
/// and instead enqueued internally to happen at the start of the next submit() call.
|
||||
pub fn send_to_gpu(&mut self, queue: &wgpu::Queue) {
|
||||
self.next_index = 0;
|
||||
|
||||
let normal_matrix = glam::Mat4::from_mat3(normal_matrix);
|
||||
for entry in &mut self.entries {
|
||||
entry.len = 0;
|
||||
|
||||
// write the transform and normal to the end of the transform
|
||||
let offset = Self::get_buffer_offset(limits, index) as _;
|
||||
queue.write_buffer(&entry.transform_buffer, offset, bytemuck::bytes_of(&transform));
|
||||
queue.write_buffer(&entry.normal_buffer, offset, bytemuck::bytes_of(&normal_matrix));
|
||||
let p = entry.transforms.as_ptr();
|
||||
let bytes = unsafe { std::slice::from_raw_parts(p as *const u8, entry.transforms.len() * entry.transforms.align()) };
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
/// Push a new transform into the buffers.
|
||||
#[instrument(skip(self, queue, limits, entity_group, transform, normal_matrix))]
|
||||
pub fn push_transform(&mut self, queue: &wgpu::Queue, limits: &Limits, entity_group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex {
|
||||
self.groups.insert(entity_group.into(), || {
|
||||
// this closure is only called when there are no values that can be reused,
|
||||
// so we get a brand new index at the end of the last entry in the chain.
|
||||
let last = self.entries.last_mut().unwrap();
|
||||
|
||||
// ensure the gpu buffer is not overflown
|
||||
debug_assert!(last.len < self.max_transform_count,
|
||||
"Transform buffer is filled and 'next_indices' was not incremented! \
|
||||
Was a new buffer created?");
|
||||
|
||||
let tidx = last.len;
|
||||
last.len += 1;
|
||||
|
||||
TransformIndex {
|
||||
entry_index: self.entries.len() - 1,
|
||||
transform_index: tidx
|
||||
}
|
||||
});
|
||||
|
||||
self.update_transform(queue, limits, entity_group, transform, normal_matrix)
|
||||
}
|
||||
|
||||
/// Collect the dead transforms and prepare self to check next time.
|
||||
pub fn tick(&mut self) {
|
||||
self.groups.update();
|
||||
}
|
||||
|
||||
/// Returns a boolean indicating if the buffer contains this group.
|
||||
pub fn contains(&self, group: TransformGroup) -> bool {
|
||||
self.groups.contains(group.into())
|
||||
queue.write_buffer(&entry.buffer, 0, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/// Update an existing transform group or if its not existing yet, pushes it to the buffer.
|
||||
///
|
||||
/// Returns: the index that the transform is at in the buffers.
|
||||
#[instrument(skip(self, queue, limits, group, transform_fn))]
|
||||
pub fn update_or_push<F>(&mut self, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform_fn: F) -> TransformIndex
|
||||
where F: Fn() -> (glam::Mat4, glam::Mat3)
|
||||
#[instrument(skip(self, device, queue, limits, group, transform, normal_matrix))]
|
||||
#[inline(always)]
|
||||
pub fn update_or_push(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, limits: &Limits, group: TransformGroup, transform: glam::Mat4, normal_matrix: glam::Mat3) -> TransformIndex
|
||||
{
|
||||
let (transform, normal_matrix) = transform_fn();
|
||||
if self.contains(group) {
|
||||
self.update_transform(queue, limits, group, transform, normal_matrix)
|
||||
} else {
|
||||
self.push_transform(queue, limits, group, transform, normal_matrix)
|
||||
// maybe will be used at some point again
|
||||
let _ = (queue, limits, group);
|
||||
|
||||
let normal_matrix = glam::Mat4::from_mat3(normal_matrix);
|
||||
|
||||
let index = self.next_index;
|
||||
self.next_index += 1;
|
||||
|
||||
// the index of the entry to put the transform into
|
||||
let entry_index = index / self.max_transform_count;
|
||||
// the index of the transform in the buffer
|
||||
let transform_index = index % self.max_transform_count;
|
||||
|
||||
if entry_index >= self.entries.len() {
|
||||
self.expand_buffers(device);
|
||||
}
|
||||
|
||||
let entry = self.entries.get_mut(entry_index).unwrap();
|
||||
|
||||
// write the transform and normal to the end of the transform
|
||||
entry.transforms.set_at(transform_index, TransformNormalMatPair {
|
||||
transform,
|
||||
normal_mat: normal_matrix,
|
||||
});
|
||||
entry.len += 1;
|
||||
|
||||
TransformIndex {
|
||||
entry_index: 0,
|
||||
transform_index: index,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,21 +279,9 @@ impl TransformBuffers {
|
|||
}
|
||||
);
|
||||
|
||||
let normal_mat_buffer = device.create_buffer(
|
||||
&wgpu::BufferDescriptor {
|
||||
label: Some(&format!("B_NormalMatrix_{}", self.entries.len())),
|
||||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
|
||||
size: max_buffer_sizes,
|
||||
mapped_at_creation: false,
|
||||
}
|
||||
);
|
||||
let tran_stride = mem::size_of::<TransformNormalMatPair>();
|
||||
|
||||
let tran_stride = mem::size_of::<glam::Mat4>();
|
||||
// although a normal matrix only needs to be a mat3, there's a weird issue with
|
||||
// misalignment from wgpu or spirv-cross: https://github.com/gfx-rs/wgpu-rs/issues/36
|
||||
let norm_stride = mem::size_of::<glam::Mat4>();
|
||||
|
||||
let transform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
let bindgroup = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
layout: &self.bindgroup_layout,
|
||||
entries: &[
|
||||
wgpu::BindGroupEntry {
|
||||
|
@ -324,42 +294,34 @@ impl TransformBuffers {
|
|||
}
|
||||
)
|
||||
},
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 1,
|
||||
resource: wgpu::BindingResource::Buffer(
|
||||
wgpu::BufferBinding {
|
||||
buffer: &normal_mat_buffer,
|
||||
offset: 0,
|
||||
size: Some(NonZeroU64::new(norm_stride as u64).unwrap())
|
||||
}
|
||||
)
|
||||
}
|
||||
],
|
||||
label: Some("BG_Transforms"),
|
||||
});
|
||||
|
||||
let mut transforms = AVec::new(limits.min_uniform_buffer_offset_alignment as _);
|
||||
transforms.resize(self.max_transform_count, TransformNormalMatPair {
|
||||
transform: glam::Mat4::IDENTITY,
|
||||
normal_mat: glam::Mat4::IDENTITY,
|
||||
});
|
||||
|
||||
let entry = BufferEntry {
|
||||
bindgroup: transform_bind_group,
|
||||
transform_buffer,
|
||||
normal_buffer: normal_mat_buffer,
|
||||
bindgroup,
|
||||
buffer: transform_buffer,
|
||||
len: 0,
|
||||
|
||||
transforms,
|
||||
};
|
||||
self.entries.push(entry);
|
||||
}
|
||||
|
||||
/// Returns the bind group for the transform index.
|
||||
#[inline(always)]
|
||||
pub fn bind_group(&self, transform_id: TransformIndex) -> &wgpu::BindGroup {
|
||||
let entry = self.entries.get(transform_id.entry_index).unwrap();
|
||||
let entry_index = transform_id.transform_index / self.max_transform_count;
|
||||
let entry = self.entries.get(entry_index).unwrap();
|
||||
&entry.bindgroup
|
||||
}
|
||||
|
||||
/// Get the buffer offset for a transform using wgpu limits.
|
||||
///
|
||||
/// If its possible to borrow immutably, use [`TransformBuffers::buffer_offset`].
|
||||
fn get_buffer_offset(limits: &wgpu::Limits, transform_index: TransformIndex) -> u32 {
|
||||
transform_index.transform_index as u32 * limits.min_uniform_buffer_offset_alignment as u32
|
||||
}
|
||||
|
||||
/// Returns the offset of the transform inside the bind group buffer.
|
||||
///
|
||||
/// ```nobuild
|
||||
|
@ -367,15 +329,21 @@ impl TransformBuffers {
|
|||
/// let offset = transform_buffers.buffer_offset(job.transform_id);
|
||||
/// render_pass.set_bind_group(1, bindgroup, &[ offset, offset, ]);
|
||||
/// ```
|
||||
#[inline(always)]
|
||||
pub fn buffer_offset(&self, transform_index: TransformIndex) -> u32 {
|
||||
Self::get_buffer_offset(&self.limits, transform_index)
|
||||
//Self::get_buffer_offset(&self.limits, transform_index)
|
||||
let transform_index = transform_index.transform_index % self.max_transform_count;
|
||||
let t = transform_index as u32 * self.limits.min_uniform_buffer_offset_alignment as u32;
|
||||
//debug!("offset: {t}");
|
||||
t
|
||||
}
|
||||
|
||||
/// Returns a boolean indicating if the buffers need to be expanded
|
||||
pub fn needs_expand(&self) -> bool {
|
||||
self.entries.last()
|
||||
false
|
||||
/* self.entries.last()
|
||||
.map(|entry| entry.len >= self.max_transform_count)
|
||||
.unwrap_or(false)
|
||||
.unwrap_or(false) */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue