Tiled Forward Rendering #5

Merged
SeanOMik merged 15 commits from feature/tiled-forward-rendering into main 2024-03-23 14:38:43 +00:00
6 changed files with 131 additions and 88 deletions
Showing only changes of commit 014abcf7e6 - Show all commits

View File

@ -170,7 +170,10 @@ async fn main() {
specular: 1.3, specular: 1.3,
}, },
Transform::new( Transform::new(
Vec3::new(-5.0, 1.0, -1.28), //Vec3::new(-5.0, 1.0, -1.28),
Vec3::new(-5.0, 1.0, -0.28),
//Vec3::new(-10.0, 0.94, -0.28),
Quat::IDENTITY, Quat::IDENTITY,
Vec3::new(0.25, 0.25, 0.25), Vec3::new(0.25, 0.25, 0.25),
), ),

View File

@ -247,7 +247,7 @@ impl LightUniform {
_padding3: 0, _padding3: 0,
color: light.color, color: light.color,
range: 2.0, range: 1.5,
intensity: 1.0, intensity: 1.0,
spot_cutoff: 0.0, spot_cutoff: 0.0,

View File

@ -7,6 +7,7 @@ use winit::dpi::PhysicalSize;
use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture}; use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture};
pub(crate) struct LightIndicesGridBuffer { pub(crate) struct LightIndicesGridBuffer {
index_counter_buffer: wgpu::Buffer,
indices_buffer: wgpu::Buffer, indices_buffer: wgpu::Buffer,
grid_texture: wgpu::Texture, grid_texture: wgpu::Texture,
grid_texture_view: wgpu::TextureView, grid_texture_view: wgpu::TextureView,
@ -34,6 +35,12 @@ impl LightCullCompute {
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
}); });
let light_index_counter_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("B_LightIndexCounter"),
contents: &bytemuck::cast_slice(&[0]),
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
});
let light_indices_bg_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { let light_indices_bg_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
entries: &[ entries: &[
wgpu::BindGroupLayoutEntry { wgpu::BindGroupLayoutEntry {
@ -57,15 +64,27 @@ impl LightCullCompute {
view_dimension: wgpu::TextureViewDimension::D2 view_dimension: wgpu::TextureViewDimension::D2
}, },
count: None, count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage {
read_only: false
},
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
} }
], ],
label: Some("BGL_LightIndicesGrid"), label: Some("BGL_LightIndicesGrid"),
}); });
// TODO: shrink the texture to match the amount of grid cells that the shader actually uses // TODO: resize texture when screen is resized
let size = wgpu::Extent3d { let size = wgpu::Extent3d {
width: screen_size.width, width: workgroup_size.x,
height: screen_size.height, height: workgroup_size.y,
depth_or_array_layers: 1, depth_or_array_layers: 1,
}; };
let grid_texture = device.create_texture( let grid_texture = device.create_texture(
@ -108,12 +127,23 @@ impl LightCullCompute {
wgpu::BindGroupEntry { wgpu::BindGroupEntry {
binding: 1, binding: 1,
resource: wgpu::BindingResource::TextureView(&grid_texture_view) resource: wgpu::BindingResource::TextureView(&grid_texture_view)
} },
wgpu::BindGroupEntry {
binding: 2,
resource: wgpu::BindingResource::Buffer(
wgpu::BufferBinding {
buffer: &light_index_counter_buffer,
offset: 0,
size: None, // the entire light buffer is needed
}
)
},
], ],
label: Some("BG_LightIndicesGrid"), label: Some("BG_LightIndicesGrid"),
}); });
LightIndicesGridBuffer { LightIndicesGridBuffer {
index_counter_buffer: light_index_counter_buffer,
indices_buffer: light_indices_buffer, indices_buffer: light_indices_buffer,
grid_texture, grid_texture,
grid_texture_view, grid_texture_view,

View File

@ -21,7 +21,7 @@ struct VertexOutput {
struct CameraUniform { struct CameraUniform {
view: mat4x4<f32>, view: mat4x4<f32>,
projection: mat4x4<f32>, inverse_projection: mat4x4<f32>,
view_projection: mat4x4<f32>, view_projection: mat4x4<f32>,
position: vec3<f32>, position: vec3<f32>,
}; };
@ -126,6 +126,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
return vec4<f32>(light_object_res, object_color.a);*/ return vec4<f32>(light_object_res, object_color.a);*/
let tile_index = vec2<u32>(floor(in.clip_position.xy / 16.0)); let tile_index = vec2<u32>(floor(in.clip_position.xy / 16.0));
let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy; let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy;

View File

@ -1,21 +1,14 @@
const BLOCK_SIZE: i32 = 16; const BLOCK_SIZE: u32 = 16u;
const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u; const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u;
const LIGHT_TY_DIRECTIONAL = 0u; const LIGHT_TY_DIRECTIONAL = 0u;
const LIGHT_TY_POINT = 1u; const LIGHT_TY_POINT = 1u;
const LIGHT_TY_SPOT = 2u; const LIGHT_TY_SPOT = 2u;
// Possible computer shader inputs:
//
// local_invocation_id
// workgroup_id
// global_invocation_id
// num_workgroups
// local_invocation_index
struct CameraUniform { struct CameraUniform {
view: mat4x4<f32>, view: mat4x4<f32>,
projection: mat4x4<f32>, inverse_projection: mat4x4<f32>,
//projection: mat4x4<f32>,
view_projection: mat4x4<f32>, view_projection: mat4x4<f32>,
position: vec3<f32>, position: vec3<f32>,
}; };
@ -42,14 +35,13 @@ struct Lights {
var<workgroup> wg_min_depth: atomic<u32>; var<workgroup> wg_min_depth: atomic<u32>;
var<workgroup> wg_max_depth: atomic<u32>; var<workgroup> wg_max_depth: atomic<u32>;
var<workgroup> wg_light_index_start: atomic<u32>;
var<workgroup> wg_frustum_planes: array<vec4<f32>, 6>; var<workgroup> wg_frustum_planes: array<vec4<f32>, 6>;
// index list of visible light sources for this tile // index list of visible light sources for this tile
var<workgroup> wg_visible_light_indices: array<u32, MAX_TILE_VISIBLE_LIGHTS>; var<workgroup> wg_visible_light_indices: array<u32, MAX_TILE_VISIBLE_LIGHTS>;
var<workgroup> wg_visible_light_count: atomic<u32>; var<workgroup> wg_visible_light_count: atomic<u32>;
//var<workgroup> view_projection: mat4x4;
@group(0) @binding(0) @group(0) @binding(0)
var t_depthmap: texture_depth_2d; var t_depthmap: texture_depth_2d;
@group(0) @binding(1) @group(0) @binding(1)
@ -65,6 +57,8 @@ var<storage, read> u_lights: Lights;
var<storage, read_write> u_light_indices: array<u32>; var<storage, read_write> u_light_indices: array<u32>;
@group(3) @binding(1) @group(3) @binding(1)
var t_light_grid: texture_storage_2d<rg32uint, read_write>; var t_light_grid: texture_storage_2d<rg32uint, read_write>;
@group(3) @binding(2)
var<storage, read_write> u_light_index_counter: atomic<u32>;
@group(4) @binding(0) @group(4) @binding(0)
var<uniform> u_screen_size: vec2<u32>; var<uniform> u_screen_size: vec2<u32>;
@ -78,12 +72,6 @@ fn cs_main(
@builtin(num_workgroups) num_workgroups: vec3<u32>, @builtin(num_workgroups) num_workgroups: vec3<u32>,
@builtin(local_invocation_index) local_invocation_index: u32, @builtin(local_invocation_index) local_invocation_index: u32,
) { ) {
//var location = vec2<i32>(global_invocation_id.xy);
var item_id = vec2<i32>(local_invocation_id.xy);
var tile_id = vec2<i32>(workgroup_id.xy);
var tile_number = vec2<i32>(num_workgroups.xy);
var index = tile_id.y * tile_number.x + tile_id.x;
// Initialize some shared global values for depth and light count // Initialize some shared global values for depth and light count
if (local_invocation_index == 0u) { if (local_invocation_index == 0u) {
wg_min_depth = 0xFFFFFFFu; wg_min_depth = 0xFFFFFFFu;
@ -112,28 +100,41 @@ fn cs_main(
// Create the frustum planes that will be used for this time // Create the frustum planes that will be used for this time
if (local_invocation_index == 0u) { if (local_invocation_index == 0u) {
var negative_step = (2.0 * vec2<f32>(tile_id)) / vec2<f32>(tile_number); // Compute the 4 corner points on the far clipping plane to use as the frustum vertices.
var positive_step = (2.0 * vec2<f32>(tile_id) + vec2<f32>(1.0, 1.0)) / vec2<f32>(tile_number); var screen_space: array<vec4<f32>, 4>;
// z in the vec4 is the distance from the center of the tile // top left point
wg_frustum_planes[0] = vec4<f32>(1.0, 0.0, 0.0, 1.0 - negative_step.x); // left var temp: vec2<u32> = workgroup_id.xy * BLOCK_SIZE;
wg_frustum_planes[1] = vec4<f32>(-1.0, 0.0, 0.0, -1.0 + positive_step.x); // right screen_space[0] = vec4<f32>(f32(temp.x), f32(temp.y), -1.0, 1.0);
wg_frustum_planes[2] = vec4<f32>(0.0, -1.0, 0.0, 1.0 - negative_step.y); // bottom
wg_frustum_planes[3] = vec4<f32>(0.0, -1.0, 0.0, -1.0 + positive_step.y); // top
wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth); // near plane
wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, max_depth); // far plane
// convert the side and top planes from clip to view space // top right point
var temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y)) * f32(BLOCK_SIZE);
screen_space[1] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
// bottom left point
temp2 = vec2<f32>(f32(workgroup_id.x), f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
screen_space[2] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
// bottom right point
temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
screen_space[3] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
// convert screenspace to view space
var view_space: array<vec3<f32>, 4>;
for (var i = 0u; i < 4u; i++) { for (var i = 0u; i < 4u; i++) {
wg_frustum_planes[i] *= u_camera.view_projection; view_space[i] = screen_to_view(screen_space[i]).xyz;
wg_frustum_planes[i] /= length(wg_frustum_planes[i].xyz);
} }
// convert near and far planes from clip to view space // View space eye is always at the origin
wg_frustum_planes[4] *= u_camera.view; let eye_pos = vec3<f32>(0.0, 0.0, 0.0);
wg_frustum_planes[4] /= length(wg_frustum_planes[4].xyz);
wg_frustum_planes[5] *= u_camera.view; wg_frustum_planes[0] = compute_plane(eye_pos, view_space[2], view_space[0]); // left plane
wg_frustum_planes[5] /= length(wg_frustum_planes[5].xyz); wg_frustum_planes[1] = compute_plane(eye_pos, view_space[1], view_space[3]); // right plane
wg_frustum_planes[2] = compute_plane(eye_pos, view_space[0], view_space[1]); // top plane
wg_frustum_planes[3] = compute_plane(eye_pos, view_space[3], view_space[2]); // bottom plane
wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth);
wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, -max_depth);
} }
workgroupBarrier(); workgroupBarrier();
@ -143,38 +144,27 @@ fn cs_main(
// Process the lights detecting which ones to cull for this tile. // Process the lights detecting which ones to cull for this tile.
// Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple // Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple
// iterations for more lights. // iterations for more lights.
var thread_count = u32(BLOCK_SIZE * BLOCK_SIZE); for (var i = local_invocation_index; i < u_lights.light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
var pass_count = (u_lights.light_count + thread_count - 1u) / thread_count; let light_index = i;
for (var i = 0u; i < pass_count; i++) {
// find the light index to check on this thread, make sure we're not trying to test
// for more lights than we have.
var light_index = i * thread_count + local_invocation_index;
if (light_index >= u_lights.light_count) {
break;
}
var light = u_lights.data[light_index]; let light = u_lights.data[light_index];
var position = light.position; let position_vec4 = u_camera.view * vec4<f32>(light.position, 1.0);
var radius = light.range; let position = position_vec4.xyz;
let radius = light.range;
if (light.light_ty == LIGHT_TY_DIRECTIONAL) { if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
//add_light(light_index); add_light(light_index);
} else if (light.light_ty == LIGHT_TY_POINT } else if (light.light_ty == LIGHT_TY_POINT
&& sphere_inside_frustrum(wg_frustum_planes, position, radius)) { && sphere_inside_frustrum(wg_frustum_planes, position, radius)) {
// TODO: add the light to the transparent geometry list // TODO: add the light to the transparent geometry list
add_light(light_index); add_light(light_index);
// TODO: spotlights
if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) { if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) {
/*var offset: u32 = wg_visible_light_count;
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
atomicAdd(&wg_visible_light_count, 1u);
wg_visible_light_indices[offset] = light_index;
}*/
} }
} }
// TODO: spotlights
} }
workgroupBarrier(); workgroupBarrier();
@ -183,41 +173,27 @@ fn cs_main(
// first update the light grid on the first thread // first update the light grid on the first thread
if (local_invocation_index == 0u) { if (local_invocation_index == 0u) {
var offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; // index in the global light list wg_light_index_start = atomicAdd(&u_light_index_counter, wg_visible_light_count);
textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(offset, wg_visible_light_count, 0u, 1u)); textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(wg_light_index_start, wg_visible_light_count, 0u, 1u));
// TODO: store light grid for transparent geometry
} }
workgroupBarrier(); workgroupBarrier();
// now update the light index list on all threads. // now update the light index list on all threads.
var indices_offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; for (var i = local_invocation_index; i < wg_visible_light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
//var pass_count = (wg_visible_light_count + thread_count - 1) / thread_count; u_light_indices[wg_light_index_start + i] = wg_visible_light_indices[i];
for (var i = 0u; i < pass_count; i++) {
// find the light index to check on this thread, make sure we're not trying to test
// for more lights than we have.
//var light_index: u32 = i * thread_count + local_invocation_index;
/*if (light_index > u_lights.light_count) {
u_visible_light_indices
break;
}*/
var offset = indices_offset + i;
if (offset >= wg_visible_light_count) {
// stop if we're over the over the amount of lights we saw
break;
}
u_light_indices[offset] = wg_visible_light_indices[i];
} }
} }
/// Add a light to the visible light indicies list. /// Add a light to the visible light indicies list.
/// Returns a boolean indicating if the light was added. /// Returns a boolean indicating if the light was added.
fn add_light(light_index: u32) -> bool { fn add_light(light_index: u32) -> bool {
var offset: u32 = wg_visible_light_count; //var offset: u32 = wg_visible_light_count;
if (offset < MAX_TILE_VISIBLE_LIGHTS) { if (wg_visible_light_count < MAX_TILE_VISIBLE_LIGHTS) {
atomicAdd(&wg_visible_light_count, 1u); let offset = atomicAdd(&wg_visible_light_count, 1u);
wg_visible_light_indices[offset] = light_index; wg_visible_light_indices[offset] = light_index;
return true; return true;
} }
@ -232,7 +208,7 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
// only check the sides of the frustum // only check the sides of the frustum
for (var i = 0u; i < 4u; i++) { for (var i = 0u; i < 4u; i++) {
if (!sphere_inside_plane(sphere_origin, radius, frustum_v[i])) { if (sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
return false; return false;
} }
} }
@ -245,7 +221,38 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
/// Source: Real-time collision detection, Christer Ericson (2005) /// Source: Real-time collision detection, Christer Ericson (2005)
/// (https://www.3dgep.com/forward-plus/#light-culling-compute-shader) /// (https://www.3dgep.com/forward-plus/#light-culling-compute-shader)
fn sphere_inside_plane(sphere_origin: vec3<f32>, radius: f32, plane: vec4<f32>) -> bool { fn sphere_inside_plane(sphere_origin: vec3<f32>, radius: f32, plane: vec4<f32>) -> bool {
//return dot(plane.xyz, sphere_origin) - plane.w < -radius; return dot(plane.xyz, sphere_origin) - plane.w < -radius;
}
return dot(vec4<f32>(sphere_origin, 0.0), plane) + radius > 0.0; fn clip_to_view(clip: vec4<f32>) -> vec4<f32> {
// view space position
var view = u_camera.inverse_projection * clip;
// perspective projection
return view / view.w;
}
fn screen_to_view(screen: vec4<f32>) -> vec4<f32> {
// convert to normalized texture coordinates
let tex_coord = screen.xy / vec2<f32>(u_screen_size);
// convert to clip space
let clip = vec4<f32>( vec2<f32>(tex_coord.x, 1.0 - tex_coord.y) * 2.0 - 1.0, screen.z, screen.w);
return clip_to_view(clip);
}
/// Compute a plane from 3 noncollinear points that form a triangle.
/// This equation assumes a right-handed (counter-clockwise winding order)
/// coordinate system to determine the direction of the plane normal.
fn compute_plane(p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>) -> vec4<f32> {
let v0 = p1 - p0;
let v2 = p2 - p0;
var plane = vec4<f32>(normalize(cross(v0, v2)), 0.0);
// find the distance to the origin
plane.w = dot(plane.xyz, p0);
return plane;
} }

View File

@ -13,6 +13,7 @@ mkShell rec {
mold mold
udev udev
lua5_4_compat lua5_4_compat
rustup
]; ];
buildInputs = [ buildInputs = [
udev alsa-lib libGL gcc udev alsa-lib libGL gcc