Tiled Forward Rendering #5

Merged
SeanOMik merged 15 commits from feature/tiled-forward-rendering into main 2024-03-23 14:38:43 +00:00
6 changed files with 131 additions and 88 deletions
Showing only changes of commit 014abcf7e6 - Show all commits

View File

@ -170,7 +170,10 @@ async fn main() {
specular: 1.3,
},
Transform::new(
Vec3::new(-5.0, 1.0, -1.28),
//Vec3::new(-5.0, 1.0, -1.28),
Vec3::new(-5.0, 1.0, -0.28),
//Vec3::new(-10.0, 0.94, -0.28),
Quat::IDENTITY,
Vec3::new(0.25, 0.25, 0.25),
),

View File

@ -247,7 +247,7 @@ impl LightUniform {
_padding3: 0,
color: light.color,
range: 2.0,
range: 1.5,
intensity: 1.0,
spot_cutoff: 0.0,

View File

@ -7,6 +7,7 @@ use winit::dpi::PhysicalSize;
use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture};
pub(crate) struct LightIndicesGridBuffer {
index_counter_buffer: wgpu::Buffer,
indices_buffer: wgpu::Buffer,
grid_texture: wgpu::Texture,
grid_texture_view: wgpu::TextureView,
@ -34,6 +35,12 @@ impl LightCullCompute {
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
});
let light_index_counter_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("B_LightIndexCounter"),
contents: &bytemuck::cast_slice(&[0]),
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
});
let light_indices_bg_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
entries: &[
wgpu::BindGroupLayoutEntry {
@ -57,15 +64,27 @@ impl LightCullCompute {
view_dimension: wgpu::TextureViewDimension::D2
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage {
read_only: false
},
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
}
],
label: Some("BGL_LightIndicesGrid"),
});
// TODO: shrink the texture to match the amount of grid cells that the shader actually uses
// TODO: resize texture when screen is resized
let size = wgpu::Extent3d {
width: screen_size.width,
height: screen_size.height,
width: workgroup_size.x,
height: workgroup_size.y,
depth_or_array_layers: 1,
};
let grid_texture = device.create_texture(
@ -108,12 +127,23 @@ impl LightCullCompute {
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::TextureView(&grid_texture_view)
}
},
wgpu::BindGroupEntry {
binding: 2,
resource: wgpu::BindingResource::Buffer(
wgpu::BufferBinding {
buffer: &light_index_counter_buffer,
offset: 0,
size: None, // the entire light buffer is needed
}
)
},
],
label: Some("BG_LightIndicesGrid"),
});
LightIndicesGridBuffer {
index_counter_buffer: light_index_counter_buffer,
indices_buffer: light_indices_buffer,
grid_texture,
grid_texture_view,

View File

@ -21,7 +21,7 @@ struct VertexOutput {
struct CameraUniform {
view: mat4x4<f32>,
projection: mat4x4<f32>,
inverse_projection: mat4x4<f32>,
view_projection: mat4x4<f32>,
position: vec3<f32>,
};
@ -126,6 +126,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
return vec4<f32>(light_object_res, object_color.a);*/
let tile_index = vec2<u32>(floor(in.clip_position.xy / 16.0));
let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy;

View File

@ -1,21 +1,14 @@
const BLOCK_SIZE: i32 = 16;
const BLOCK_SIZE: u32 = 16u;
const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u;
const LIGHT_TY_DIRECTIONAL = 0u;
const LIGHT_TY_POINT = 1u;
const LIGHT_TY_SPOT = 2u;
// Possible computer shader inputs:
//
// local_invocation_id
// workgroup_id
// global_invocation_id
// num_workgroups
// local_invocation_index
struct CameraUniform {
view: mat4x4<f32>,
projection: mat4x4<f32>,
inverse_projection: mat4x4<f32>,
//projection: mat4x4<f32>,
view_projection: mat4x4<f32>,
position: vec3<f32>,
};
@ -42,14 +35,13 @@ struct Lights {
var<workgroup> wg_min_depth: atomic<u32>;
var<workgroup> wg_max_depth: atomic<u32>;
var<workgroup> wg_light_index_start: atomic<u32>;
var<workgroup> wg_frustum_planes: array<vec4<f32>, 6>;
// index list of visible light sources for this tile
var<workgroup> wg_visible_light_indices: array<u32, MAX_TILE_VISIBLE_LIGHTS>;
var<workgroup> wg_visible_light_count: atomic<u32>;
//var<workgroup> view_projection: mat4x4;
@group(0) @binding(0)
var t_depthmap: texture_depth_2d;
@group(0) @binding(1)
@ -65,6 +57,8 @@ var<storage, read> u_lights: Lights;
var<storage, read_write> u_light_indices: array<u32>;
@group(3) @binding(1)
var t_light_grid: texture_storage_2d<rg32uint, read_write>;
@group(3) @binding(2)
var<storage, read_write> u_light_index_counter: atomic<u32>;
@group(4) @binding(0)
var<uniform> u_screen_size: vec2<u32>;
@ -78,12 +72,6 @@ fn cs_main(
@builtin(num_workgroups) num_workgroups: vec3<u32>,
@builtin(local_invocation_index) local_invocation_index: u32,
) {
//var location = vec2<i32>(global_invocation_id.xy);
var item_id = vec2<i32>(local_invocation_id.xy);
var tile_id = vec2<i32>(workgroup_id.xy);
var tile_number = vec2<i32>(num_workgroups.xy);
var index = tile_id.y * tile_number.x + tile_id.x;
// Initialize some shared global values for depth and light count
if (local_invocation_index == 0u) {
wg_min_depth = 0xFFFFFFFu;
@ -112,28 +100,41 @@ fn cs_main(
// Create the frustum planes that will be used for this time
if (local_invocation_index == 0u) {
var negative_step = (2.0 * vec2<f32>(tile_id)) / vec2<f32>(tile_number);
var positive_step = (2.0 * vec2<f32>(tile_id) + vec2<f32>(1.0, 1.0)) / vec2<f32>(tile_number);
// Compute the 4 corner points on the far clipping plane to use as the frustum vertices.
var screen_space: array<vec4<f32>, 4>;
// z in the vec4 is the distance from the center of the tile
wg_frustum_planes[0] = vec4<f32>(1.0, 0.0, 0.0, 1.0 - negative_step.x); // left
wg_frustum_planes[1] = vec4<f32>(-1.0, 0.0, 0.0, -1.0 + positive_step.x); // right
wg_frustum_planes[2] = vec4<f32>(0.0, -1.0, 0.0, 1.0 - negative_step.y); // bottom
wg_frustum_planes[3] = vec4<f32>(0.0, -1.0, 0.0, -1.0 + positive_step.y); // top
wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth); // near plane
wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, max_depth); // far plane
// top left point
var temp: vec2<u32> = workgroup_id.xy * BLOCK_SIZE;
screen_space[0] = vec4<f32>(f32(temp.x), f32(temp.y), -1.0, 1.0);
// convert the side and top planes from clip to view space
// top right point
var temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y)) * f32(BLOCK_SIZE);
screen_space[1] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
// bottom left point
temp2 = vec2<f32>(f32(workgroup_id.x), f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
screen_space[2] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
// bottom right point
temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
screen_space[3] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
// convert screenspace to view space
var view_space: array<vec3<f32>, 4>;
for (var i = 0u; i < 4u; i++) {
wg_frustum_planes[i] *= u_camera.view_projection;
wg_frustum_planes[i] /= length(wg_frustum_planes[i].xyz);
view_space[i] = screen_to_view(screen_space[i]).xyz;
}
// convert near and far planes from clip to view space
wg_frustum_planes[4] *= u_camera.view;
wg_frustum_planes[4] /= length(wg_frustum_planes[4].xyz);
wg_frustum_planes[5] *= u_camera.view;
wg_frustum_planes[5] /= length(wg_frustum_planes[5].xyz);
// View space eye is always at the origin
let eye_pos = vec3<f32>(0.0, 0.0, 0.0);
wg_frustum_planes[0] = compute_plane(eye_pos, view_space[2], view_space[0]); // left plane
wg_frustum_planes[1] = compute_plane(eye_pos, view_space[1], view_space[3]); // right plane
wg_frustum_planes[2] = compute_plane(eye_pos, view_space[0], view_space[1]); // top plane
wg_frustum_planes[3] = compute_plane(eye_pos, view_space[3], view_space[2]); // bottom plane
wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth);
wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, -max_depth);
}
workgroupBarrier();
@ -143,38 +144,27 @@ fn cs_main(
// Process the lights detecting which ones to cull for this tile.
// Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple
// iterations for more lights.
var thread_count = u32(BLOCK_SIZE * BLOCK_SIZE);
var pass_count = (u_lights.light_count + thread_count - 1u) / thread_count;
for (var i = 0u; i < pass_count; i++) {
// find the light index to check on this thread, make sure we're not trying to test
// for more lights than we have.
var light_index = i * thread_count + local_invocation_index;
if (light_index >= u_lights.light_count) {
break;
}
for (var i = local_invocation_index; i < u_lights.light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
let light_index = i;
var light = u_lights.data[light_index];
var position = light.position;
var radius = light.range;
let light = u_lights.data[light_index];
let position_vec4 = u_camera.view * vec4<f32>(light.position, 1.0);
let position = position_vec4.xyz;
let radius = light.range;
if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
//add_light(light_index);
add_light(light_index);
} else if (light.light_ty == LIGHT_TY_POINT
&& sphere_inside_frustrum(wg_frustum_planes, position, radius)) {
// TODO: add the light to the transparent geometry list
add_light(light_index);
// TODO: spotlights
if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) {
/*var offset: u32 = wg_visible_light_count;
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
atomicAdd(&wg_visible_light_count, 1u);
wg_visible_light_indices[offset] = light_index;
}*/
}
}
// TODO: spotlights
}
workgroupBarrier();
@ -183,41 +173,27 @@ fn cs_main(
// first update the light grid on the first thread
if (local_invocation_index == 0u) {
var offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; // index in the global light list
textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(offset, wg_visible_light_count, 0u, 1u));
wg_light_index_start = atomicAdd(&u_light_index_counter, wg_visible_light_count);
textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(wg_light_index_start, wg_visible_light_count, 0u, 1u));
// TODO: store light grid for transparent geometry
}
workgroupBarrier();
// now update the light index list on all threads.
var indices_offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS;
//var pass_count = (wg_visible_light_count + thread_count - 1) / thread_count;
for (var i = 0u; i < pass_count; i++) {
// find the light index to check on this thread, make sure we're not trying to test
// for more lights than we have.
//var light_index: u32 = i * thread_count + local_invocation_index;
/*if (light_index > u_lights.light_count) {
u_visible_light_indices
break;
}*/
var offset = indices_offset + i;
if (offset >= wg_visible_light_count) {
// stop if we're over the over the amount of lights we saw
break;
}
u_light_indices[offset] = wg_visible_light_indices[i];
for (var i = local_invocation_index; i < wg_visible_light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
u_light_indices[wg_light_index_start + i] = wg_visible_light_indices[i];
}
}
/// Add a light to the visible light indicies list.
/// Returns a boolean indicating if the light was added.
fn add_light(light_index: u32) -> bool {
var offset: u32 = wg_visible_light_count;
//var offset: u32 = wg_visible_light_count;
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
atomicAdd(&wg_visible_light_count, 1u);
if (wg_visible_light_count < MAX_TILE_VISIBLE_LIGHTS) {
let offset = atomicAdd(&wg_visible_light_count, 1u);
wg_visible_light_indices[offset] = light_index;
return true;
}
@ -232,7 +208,7 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
// only check the sides of the frustum
for (var i = 0u; i < 4u; i++) {
if (!sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
if (sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
return false;
}
}
@ -245,7 +221,38 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
/// Source: Real-time collision detection, Christer Ericson (2005)
/// (https://www.3dgep.com/forward-plus/#light-culling-compute-shader)
fn sphere_inside_plane(sphere_origin: vec3<f32>, radius: f32, plane: vec4<f32>) -> bool {
//return dot(plane.xyz, sphere_origin) - plane.w < -radius;
return dot(plane.xyz, sphere_origin) - plane.w < -radius;
}
return dot(vec4<f32>(sphere_origin, 0.0), plane) + radius > 0.0;
fn clip_to_view(clip: vec4<f32>) -> vec4<f32> {
// view space position
var view = u_camera.inverse_projection * clip;
// perspective projection
return view / view.w;
}
fn screen_to_view(screen: vec4<f32>) -> vec4<f32> {
// convert to normalized texture coordinates
let tex_coord = screen.xy / vec2<f32>(u_screen_size);
// convert to clip space
let clip = vec4<f32>( vec2<f32>(tex_coord.x, 1.0 - tex_coord.y) * 2.0 - 1.0, screen.z, screen.w);
return clip_to_view(clip);
}
/// Compute a plane from 3 noncollinear points that form a triangle.
/// This equation assumes a right-handed (counter-clockwise winding order)
/// coordinate system to determine the direction of the plane normal.
fn compute_plane(p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>) -> vec4<f32> {
let v0 = p1 - p0;
let v2 = p2 - p0;
var plane = vec4<f32>(normalize(cross(v0, v2)), 0.0);
// find the distance to the origin
plane.w = dot(plane.xyz, p0);
return plane;
}

View File

@ -13,6 +13,7 @@ mkShell rec {
mold
udev
lua5_4_compat
rustup
];
buildInputs = [
udev alsa-lib libGL gcc