Tiled Forward Rendering #5

Merged
SeanOMik merged 15 commits from feature/tiled-forward-rendering into main 2024-03-23 14:38:43 +00:00
6 changed files with 143 additions and 85 deletions
Showing only changes of commit 5c1ce809ff - Show all commits

View File

@ -25,8 +25,8 @@ impl Default for FreeFlyCamera {
Self {
speed: 4.0,
slow_speed_factor: 0.25,
look_speed: 0.3,
mouse_sensitivity: 1.0,
look_speed: 0.5,
mouse_sensitivity: 0.9,
look_with_keys: false,
}
}

View File

@ -1,6 +1,6 @@
use std::{cell::Ref, ptr::NonNull};
use lyra_engine::{assets::gltf::Gltf, ecs::{query::{QueryBorrow, ViewState}, system::{BatchedSystem, Criteria, CriteriaSchedule, IntoSystem}, Component, World}, game::Game, input::{Action, ActionHandler, ActionKind, ActionMapping, ActionMappingId, ActionSource, InputActionPlugin, KeyCode, LayoutId, MouseAxis, MouseInput}, math::{self, Quat, Transform, Vec3}, render::light::{directional::DirectionalLight, SpotLight}, scene::CameraComponent, DeltaTime};
use lyra_engine::{assets::gltf::Gltf, change_tracker::Ct, ecs::{query::{QueryBorrow, ViewState}, system::{BatchedSystem, Criteria, CriteriaSchedule, IntoSystem}, Component, World}, game::Game, input::{Action, ActionHandler, ActionKind, ActionMapping, ActionMappingId, ActionSource, InputActionPlugin, KeyCode, LayoutId, MouseAxis, MouseInput}, math::{self, Quat, Transform, Vec3}, render::{light::{directional::DirectionalLight, PointLight, SpotLight}, window::{CursorGrabMode, WindowOptions}}, scene::CameraComponent, DeltaTime};
use lyra_engine::assets::ResourceManager;
mod free_fly_camera;
@ -93,15 +93,16 @@ async fn main() {
//let diffuse_texture = resman.request::<Texture>("assets/happy-tree.png").unwrap();
//let antique_camera_model = resman.request::<Model>("assets/AntiqueCamera.glb").unwrap();
//let cube_model = resman.request::<Model>("assets/cube-texture-bin.glb").unwrap();
/* let cube_gltf = resman.request::<Gltf>("assets/texture-sep/texture-sep.gltf").unwrap();
let crate_gltf = resman.request::<Gltf>("assets/crate/crate.gltf").unwrap();
let cube_gltf = resman.request::<Gltf>("assets/texture-sep/texture-sep.gltf").unwrap();
/*let crate_gltf = resman.request::<Gltf>("assets/crate/crate.gltf").unwrap();
let separate_gltf = resman.request::<Gltf>("assets/pos-testing/child-node-cubes.glb").unwrap(); */
//drop(resman);
/* let cube_mesh = &cube_gltf.data_ref()
cube_gltf.wait_recurse_dependencies_load();
let cube_mesh = &cube_gltf.data_ref()
.unwrap().meshes[0];
let crate_mesh = &crate_gltf.data_ref()
/* let crate_mesh = &crate_gltf.data_ref()
.unwrap().meshes[0];
let separate_scene = &separate_gltf.data_ref()
@ -151,29 +152,33 @@ async fn main() {
));
}
/* {
let mut light_tran = Transform::from_xyz(-3.5, 0.2, -4.5);
light_tran.scale = Vec3::new(0.5, 0.5, 0.5);
{
//let mut light_tran = Transform::from_xyz(-3.5, 0.2, -4.5);
//light_tran.scale = Vec3::new(0.5, 0.5, 0.5);
world.spawn((
SpotLight {
color: Vec3::new(1.0, 0.2, 0.2),
cutoff: math::Angle::Degrees(12.5),
outer_cutoff: math::Angle::Degrees(17.5),
PointLight {
color: Vec3::new(0.0, 0.0, 1.0),
intensity: 3.3,
constant: 1.0,
linear: 0.007,
quadratic: 0.0002,
ambient: 0.0,
diffuse: 7.0,
specular: 1.0,
linear: 0.09,
quadratic: 0.032,
ambient: 0.2,
diffuse: 1.0,
specular: 1.3,
},
Transform::from(light_tran),
Transform::new(
Vec3::new(-5.0, 1.0, -1.28),
Quat::IDENTITY,
Vec3::new(0.25, 0.25, 0.25),
),
cube_mesh.clone(),
));
}
{
/* {
let mut light_tran = Transform::from_xyz(2.0, 2.5, -9.5);
light_tran.scale = Vec3::new(0.5, 0.5, 0.5);
world.spawn((

View File

@ -6,26 +6,31 @@ use winit::dpi::PhysicalSize;
use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture};
struct LightIndicesGridBuffer {
pub(crate) struct LightIndicesGridBuffer {
indices_buffer: wgpu::Buffer,
grid_texture: wgpu::Texture,
grid_texture_view: wgpu::TextureView,
bg_pair: BindGroupPair,
pub bg_pair: BindGroupPair,
}
pub(crate) struct LightCullCompute {
device: Rc<wgpu::Device>,
queue: Rc<wgpu::Queue>,
pipeline: ComputePipeline,
light_indices_grid: LightIndicesGridBuffer,
pub light_indices_grid: LightIndicesGridBuffer,
screen_size_buffer: BufferWrapper,
workgroup_size: glam::UVec2,
}
impl LightCullCompute {
fn create_grid(device: &wgpu::Device, screen_size: PhysicalSize<u32>) -> LightIndicesGridBuffer {
fn create_grid(device: &wgpu::Device, screen_size: PhysicalSize<u32>, workgroup_size: glam::UVec2) -> LightIndicesGridBuffer {
let mut contents = Vec::<u8>::new();
let contents_len = workgroup_size.x * workgroup_size.y * mem::size_of::<u8>() as u32;
contents.resize(contents_len as _, 0);
let light_indices_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("B_LightIndices"),
contents: &[0; mem::size_of::<u32>() * 16 * 16],
contents: &contents,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
});
@ -131,8 +136,8 @@ impl LightCullCompute {
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(shader_src)),
});
let light_grid = Self::create_grid(&device, screen_size);
let workgroup_size = glam::UVec2::new((screen_size.width as f32 / 16.0).ceil() as u32, (screen_size.height as f32 / 16.0).ceil() as u32);
let light_grid = Self::create_grid(&device, screen_size, workgroup_size);
let depth_tex_pair = depth_texture.create_bind_group(&device);
@ -161,12 +166,14 @@ impl LightCullCompute {
pipeline,
light_indices_grid: light_grid,
screen_size_buffer,
workgroup_size,
}
}
pub fn update_screen_size(&self, size: PhysicalSize<u32>) {
pub fn update_screen_size(&mut self, size: PhysicalSize<u32>) {
self.screen_size_buffer.write_buffer(&self.queue, 0,
&[UVec2::new(size.width, size.height)]);
self.workgroup_size = glam::UVec2::new((size.width as f32 / 16.0).ceil() as u32, (size.height as f32 / 16.0).ceil() as u32);
}
pub fn compute(&mut self, camera_buffers: &BufferWrapper, lights_buffers: &LightUniformBuffers, depth_texture: &RenderTexture) {
@ -187,7 +194,7 @@ impl LightCullCompute {
pass.set_bind_group(3, &self.light_indices_grid.bg_pair.bindgroup, &[]);
pass.set_bind_group(4, self.screen_size_buffer.bindgroup(), &[]);
pass.dispatch_workgroups(16, 16, 1);
pass.dispatch_workgroups(self.workgroup_size.x, self.workgroup_size.y, 1);
}
self.queue.submit(std::iter::once(encoder.finish()));
self.device.poll(wgpu::Maintain::Wait);

View File

@ -250,7 +250,9 @@ impl BasicRenderer {
vec![&s.bgl_texture, &s.transform_buffers.bindgroup_layout,
s.camera_buffer.bindgroup_layout().unwrap(),
&s.light_buffers.bind_group_pair.layout, &s.material_buffer.bindgroup_pair.as_ref().unwrap().layout,
&s.bgl_texture])));
&s.bgl_texture,
&s.light_cull_compute.light_indices_grid.bg_pair.layout,
])));
s.render_pipelines = pipelines;
s
@ -582,6 +584,8 @@ impl Renderer for BasicRenderer {
render_pass.set_bind_group(3, &self.light_buffers.bind_group_pair.bindgroup, &[]);
render_pass.set_bind_group(4, &self.material_buffer.bindgroup_pair.as_ref().unwrap().bindgroup, &[]);
render_pass.set_bind_group(6, &self.light_cull_compute.light_indices_grid.bg_pair.bindgroup, &[]);
// if this mesh uses indices, use them to draw the mesh
if let Some((idx_type, indices)) = buffers.buffer_indices.as_ref() {
let indices_len = indices.count() as u32;

View File

@ -2,9 +2,9 @@
const max_light_count: u32 = 16u;
const light_ty_directional = 0u;
const light_ty_point = 1u;
const light_ty_spot = 2u;
const LIGHT_TY_DIRECTIONAL = 0u;
const LIGHT_TY_POINT = 1u;
const LIGHT_TY_SPOT = 2u;
struct VertexInput {
@location(0) position: vec3<f32>,
@ -97,41 +97,79 @@ var t_specular: texture_2d<f32>;
@group(5) @binding(1)
var s_specular: sampler;
@group(6) @binding(0)
var<storage, read_write> u_light_indices: array<u32>;
@group(6) @binding(1)
var t_light_grid: texture_storage_2d<rg32uint, read_write>; // vec2<u32>
@fragment
fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
let object_color: vec4<f32> = textureSample(t_diffuse, s_diffuse, in.tex_coords);
/*let object_color: vec4<f32> = textureSample(t_diffuse, s_diffuse, in.tex_coords);
let specular_color: vec3<f32> = textureSample(t_specular, s_specular, in.tex_coords).xyz;
// this needs to be 0.0 for the math
//u_lights.directional_light.direction.w = 0.0;
var light_res = vec3<f32>(0.0);
for (var i = 0u; i < u_lights.light_count; i++) {
var light = u_lights.data[i];
if (light.light_ty == light_ty_directional) {
if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
light_res += blinn_phong_dir_light(in.world_position, in.world_normal, light, u_material, specular_color);
} else if (light.light_ty == light_ty_point) {
} else if (light.light_ty == LIGHT_TY_POINT) {
light_res += blinn_phong_point_light(in.world_position, in.world_normal, light, u_material, specular_color);
} else if (light.light_ty == light_ty_spot) {
} else if (light.light_ty == LIGHT_TY_SPOT) {
light_res += blinn_phong_spot_light(in.world_position, in.world_normal, light, u_material, specular_color);
}
}
/*var light_res = blinn_phong_dir_light(in.world_position, in.world_normal, u_lights.directional_light, u_material, specular_color);
for (var i = 0u; i < u_lights.point_light_count; i++) {
light_res += blinn_phong_point_light(in.world_position, in.world_normal, u_lights.point_lights[i], u_material, specular_color);
}
for (var i = 0u; i < u_lights.spot_light_count; i++) {
light_res += blinn_phong_spot_light(in.world_position, in.world_normal, u_lights.spot_lights[i], u_material, specular_color);
}*/
let light_object_res = light_res * (object_color.xyz/* * u_material.diffuse.xyz*/);
return vec4<f32>(light_object_res, object_color.a);*/
let tile_index = vec2<u32>(floor(in.clip_position.xy / 16.0));
let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy;
let object_color: vec4<f32> = textureSample(t_diffuse, s_diffuse, in.tex_coords);
let specular_color: vec3<f32> = textureSample(t_specular, s_specular, in.tex_coords).xyz;
var light_res = vec3<f32>(0.0);
let light_offset = tile.x;
let light_count = tile.y;
for (var i = 0u; i < light_count; i++) {
let light_index = u_light_indices[light_offset + i];
let light: Light = u_lights.data[light_index];
if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
light_res += blinn_phong_dir_light(in.world_position, in.world_normal, light, u_material, specular_color);
} else if (light.light_ty == LIGHT_TY_POINT) {
light_res += blinn_phong_point_light(in.world_position, in.world_normal, light, u_material, specular_color);
} else if (light.light_ty == LIGHT_TY_SPOT) {
light_res += blinn_phong_spot_light(in.world_position, in.world_normal, light, u_material, specular_color);
}
}
let light_object_res = light_res * (object_color.xyz);
return vec4<f32>(light_object_res, object_color.a);
//return debug_grid(in);
}
fn debug_grid(in: VertexOutput) -> vec4<f32> {
let tile_index_float: vec2<f32> = in.clip_position.xy / 16.0;
let tile_index = vec2<u32>(floor(tile_index_float));
let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy;
// detect where the line grids would be at
let x = tile_index_float.x - trunc(tile_index_float.x);
let y = tile_index_float.y - trunc(tile_index_float.y);
let ta: bool = x < 0.05 || y < 0.05;
let tb: bool = x > 0.95 || y > 0.95;
if ( ta || tb ) {
return vec4<f32>(0.0, 0.0, 0.0, 1.0);
} else {
return vec4<f32>(f32(tile_index.x) / 50.0, f32(tile_index.y) / 38.0, 0.0, 1.0);
}
}
fn blinn_phong_dir_light(world_pos: vec3<f32>, world_norm: vec3<f32>, dir_light: Light, material: Material, specular_factor: vec3<f32>) -> vec3<f32> {

View File

@ -1,9 +1,9 @@
const block_size: i32 = 16;
const max_tile_visible_lights: u32 = 1024u;
const BLOCK_SIZE: i32 = 16;
const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u;
const light_ty_directional = 0u;
const light_ty_point = 1u;
const light_ty_spot = 2u;
const LIGHT_TY_DIRECTIONAL = 0u;
const LIGHT_TY_POINT = 1u;
const LIGHT_TY_SPOT = 2u;
// Possible computer shader inputs:
//
@ -44,13 +44,13 @@ var<workgroup> wg_max_depth: atomic<u32>;
var<workgroup> wg_frustum_planes: array<vec4<f32>, 6>;
// index list of visible light sources for this tile
var<workgroup> wg_visible_light_indices: array<u32, max_tile_visible_lights>;
var<workgroup> wg_visible_light_indices: array<u32, MAX_TILE_VISIBLE_LIGHTS>;
var<workgroup> wg_visible_light_count: atomic<u32>;
//var<workgroup> view_projection: mat4x4;
@group(0) @binding(0)
var t_depthmap: texture_2d<f32>;
var t_depthmap: texture_depth_2d;
@group(0) @binding(1)
var s_depthmap: sampler;
@ -93,9 +93,8 @@ fn cs_main(
workgroupBarrier();
// step 1: calculate the minimum and maximum depth values for this tile (using the depth map)
var tex_coord = vec2<f32>(global_invocation_id.xy);
//var depth_float: f32 = textureSample(t_depthmap, s_depthmap, tex_coord).r;
var depth_float = 0.0;
var tex_coord = vec2<u32>(global_invocation_id.xy);
var depth_float: f32 = textureLoad(t_depthmap, tex_coord, 0);
// bitcast the floating depth to u32 for atomic comparisons between threads
var depth_uint: u32 = bitcast<u32>(depth_float);
@ -143,7 +142,7 @@ fn cs_main(
// Process the lights detecting which ones to cull for this tile.
// Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple
// iterations for more lights.
var thread_count = u32(block_size * block_size);
var thread_count = u32(BLOCK_SIZE * BLOCK_SIZE);
var pass_count = (u_lights.light_count + thread_count - 1u) / thread_count;
for (var i = 0u; i < pass_count; i++) {
// find the light index to check on this thread, make sure we're not trying to test
@ -157,18 +156,21 @@ fn cs_main(
var position = light.position;
var radius = light.range;
if (light.light_ty != light_ty_directional
if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
add_light(light_index);
} else if (light.light_ty == LIGHT_TY_POINT
&& sphere_inside_frustrum(wg_frustum_planes, position, radius)) {
// TODO: add the light to the transparent geometry list
// TODO: spotlights
if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) {
var offset: u32 = wg_visible_light_count;
add_light(light_index);
/*var offset: u32 = wg_visible_light_count;
if (offset < max_tile_visible_lights) {
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
atomicAdd(&wg_visible_light_count, 1u);
wg_visible_light_indices[offset] = light_index;
}
}*/
}
}
}
@ -179,28 +181,14 @@ fn cs_main(
// first update the light grid on the first thread
if (local_invocation_index == 0u) {
var offset = u32(index) * max_tile_visible_lights; // index in the global light list
//t_light_grid[workgroup_id.x][workgroup_id.y] = vec2<f32>(offset, wg_visible_light_count);
var offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; // index in the global light list
textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(offset, wg_visible_light_count, 0u, 1u));
// TODO: update transparent light grid
/*var offset = index * max_tile_visible_lights; // position in the global light buffer
// update the light
for (var i = 0u; i < wg_visible_light_count; i++) {
//u_visible_light_indices[offset + i] = wg_visible_light_indices[i];
}
if (wg_visible_light_count != 1024) {
// Mark the end of the visible lights for this tile
u_visible_light_indices[offset + wg_visible_light_count] = -1;
}*/
}
workgroupBarrier();
// now update the light index list on all threads.
var indices_offset = u32(index) * max_tile_visible_lights;
var indices_offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS;
//var pass_count = (wg_visible_light_count + thread_count - 1) / thread_count;
for (var i = 0u; i < pass_count; i++) {
// find the light index to check on this thread, make sure we're not trying to test
@ -221,6 +209,20 @@ fn cs_main(
}
}
/// Add a light to the visible light indicies list.
/// Returns a boolean indicating if the light was added.
fn add_light(light_index: u32) -> bool {
var offset: u32 = wg_visible_light_count;
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
atomicAdd(&wg_visible_light_count, 1u);
wg_visible_light_indices[offset] = light_index;
return true;
}
return false;
}
fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>, radius: f32) -> bool {
// to be able to index this array with a non-const value,
// it must be defined as a var
@ -241,5 +243,7 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
/// Source: Real-time collision detection, Christer Ericson (2005)
/// (https://www.3dgep.com/forward-plus/#light-culling-compute-shader)
fn sphere_inside_plane(sphere_origin: vec3<f32>, radius: f32, plane: vec4<f32>) -> bool {
return dot(plane.xyz, sphere_origin) - plane.w < -radius;
//return dot(plane.xyz, sphere_origin) - plane.w < -radius;
return dot(vec4<f32>(sphere_origin, 0.0), plane) + radius > 0.0;
}