render: fix the tile frustum used for culling lights

2024-03-17 18:11:38 -04:00 · 2024-03-17 18:11:38 -04:00 · 014abcf7e6
commit 014abcf7e6
parent 76ec9606ec
6 changed files with 131 additions and 88 deletions
--- a/examples/testbed/src/main.rs
+++ b/examples/testbed/src/main.rs
@ -170,7 +170,10 @@ async fn main() {
                    specular: 1.3,
                },
                Transform::new(
-                    Vec3::new(-5.0, 1.0, -1.28), 
+                    //Vec3::new(-5.0, 1.0, -1.28), 
+                    Vec3::new(-5.0, 1.0, -0.28),
+                    //Vec3::new(-10.0, 0.94, -0.28), 
+
                    Quat::IDENTITY,
                    Vec3::new(0.25, 0.25, 0.25),
                ),
--- a/lyra-game/src/render/light/mod.rs
+++ b/lyra-game/src/render/light/mod.rs
@ -247,7 +247,7 @@ impl LightUniform {
            _padding3: 0,
            color: light.color,

-            range: 2.0,
+            range: 1.5,
            intensity: 1.0,

            spot_cutoff: 0.0,
--- a/lyra-game/src/render/light_cull_compute.rs
+++ b/lyra-game/src/render/light_cull_compute.rs
@ -7,6 +7,7 @@ use winit::dpi::PhysicalSize;
 use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture};

 pub(crate) struct LightIndicesGridBuffer {
+    index_counter_buffer: wgpu::Buffer,
    indices_buffer: wgpu::Buffer,
    grid_texture: wgpu::Texture,
    grid_texture_view: wgpu::TextureView,
@ -34,6 +35,12 @@ impl LightCullCompute {
            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
        });

+        let light_index_counter_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("B_LightIndexCounter"),
+            contents: &bytemuck::cast_slice(&[0]),
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+        });
+
        let light_indices_bg_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
            entries: &[
                wgpu::BindGroupLayoutEntry {
@ -57,15 +64,27 @@ impl LightCullCompute {
                        view_dimension: wgpu::TextureViewDimension::D2
                    },
                    count: None,
+                },
+                wgpu::BindGroupLayoutEntry {
+                    binding: 2,
+                    visibility: wgpu::ShaderStages::COMPUTE,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Storage {
+                            read_only: false
+                        },
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
                }
            ],
            label: Some("BGL_LightIndicesGrid"),
        });

-        // TODO: shrink the texture to match the amount of grid cells that the shader actually uses
+        // TODO: resize texture when screen is resized
        let size = wgpu::Extent3d {
-            width: screen_size.width,
-            height: screen_size.height,
+            width: workgroup_size.x,
+            height: workgroup_size.y,
            depth_or_array_layers: 1,
        };
        let grid_texture = device.create_texture(
@ -108,12 +127,23 @@ impl LightCullCompute {
                wgpu::BindGroupEntry {
                    binding: 1,
                    resource: wgpu::BindingResource::TextureView(&grid_texture_view)
+                },
+                wgpu::BindGroupEntry {
+                    binding: 2,
+                    resource: wgpu::BindingResource::Buffer(
+                            wgpu::BufferBinding {
+                            buffer: &light_index_counter_buffer,
+                            offset: 0,
+                            size: None, // the entire light buffer is needed
                        }
+                    )
+                },
            ],
            label: Some("BG_LightIndicesGrid"),
        });

        LightIndicesGridBuffer {
+            index_counter_buffer: light_index_counter_buffer,
            indices_buffer: light_indices_buffer,
            grid_texture,
            grid_texture_view,
--- a/lyra-game/src/render/shaders/base.wgsl
+++ b/lyra-game/src/render/shaders/base.wgsl
@ -21,7 +21,7 @@ struct VertexOutput {

 struct CameraUniform {
    view: mat4x4<f32>,
-    projection: mat4x4<f32>,
+    inverse_projection: mat4x4<f32>,
    view_projection: mat4x4<f32>,
    position: vec3<f32>,
 };
@ -126,6 +126,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {

    return vec4<f32>(light_object_res, object_color.a);*/
    
+
+    
    let tile_index = vec2<u32>(floor(in.clip_position.xy / 16.0));
    let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy;

--- a/lyra-game/src/render/shaders/light_cull.comp.wgsl
+++ b/lyra-game/src/render/shaders/light_cull.comp.wgsl
@ -1,21 +1,14 @@
-const BLOCK_SIZE: i32 = 16;
+const BLOCK_SIZE: u32 = 16u;
 const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u;

 const LIGHT_TY_DIRECTIONAL = 0u;
 const LIGHT_TY_POINT = 1u;
 const LIGHT_TY_SPOT = 2u;

-// Possible computer shader inputs:
-//
-// local_invocation_id
-// workgroup_id
-// global_invocation_id
-// num_workgroups
-// local_invocation_index
-
 struct CameraUniform {
    view: mat4x4<f32>,
-    projection: mat4x4<f32>,
+    inverse_projection: mat4x4<f32>,
+    //projection: mat4x4<f32>,
    view_projection: mat4x4<f32>,
    position: vec3<f32>,
 };
@ -42,14 +35,13 @@ struct Lights {

 var<workgroup> wg_min_depth: atomic<u32>;
 var<workgroup> wg_max_depth: atomic<u32>;
+var<workgroup> wg_light_index_start: atomic<u32>;
 var<workgroup> wg_frustum_planes: array<vec4<f32>, 6>;

 // index list of visible light sources for this tile
 var<workgroup> wg_visible_light_indices: array<u32, MAX_TILE_VISIBLE_LIGHTS>;
 var<workgroup> wg_visible_light_count: atomic<u32>;

-//var<workgroup> view_projection: mat4x4;
-
@group(0) @binding(0)
 var t_depthmap: texture_depth_2d;
@group(0) @binding(1)
@ -65,6 +57,8 @@ var<storage, read> u_lights: Lights;
 var<storage, read_write> u_light_indices: array<u32>;
@group(3) @binding(1)
 var t_light_grid: texture_storage_2d<rg32uint, read_write>;
+@group(3) @binding(2)
+var<storage, read_write> u_light_index_counter: atomic<u32>;

@group(4) @binding(0)
 var<uniform> u_screen_size: vec2<u32>;
@ -78,12 +72,6 @@ fn cs_main(
    @builtin(num_workgroups) num_workgroups: vec3<u32>,
    @builtin(local_invocation_index) local_invocation_index: u32,
 ) {
-    //var location = vec2<i32>(global_invocation_id.xy);
-    var item_id = vec2<i32>(local_invocation_id.xy);
-    var tile_id = vec2<i32>(workgroup_id.xy);
-    var tile_number = vec2<i32>(num_workgroups.xy);
-    var index = tile_id.y * tile_number.x + tile_id.x;
-
    // Initialize some shared global values for depth and light count
    if (local_invocation_index == 0u) {
        wg_min_depth = 0xFFFFFFFu;
@ -112,28 +100,41 @@ fn cs_main(
    
    // Create the frustum planes that will be used for this time
    if (local_invocation_index == 0u) {
-        var negative_step = (2.0 * vec2<f32>(tile_id)) / vec2<f32>(tile_number);
-        var positive_step = (2.0 * vec2<f32>(tile_id) + vec2<f32>(1.0, 1.0)) / vec2<f32>(tile_number);
+        // Compute the 4 corner points on the far clipping plane to use as the frustum vertices.
+        var screen_space: array<vec4<f32>, 4>;

-        // z in the vec4 is the distance from the center of the tile
-        wg_frustum_planes[0] = vec4<f32>(1.0, 0.0, 0.0, 1.0 - negative_step.x); // left
-        wg_frustum_planes[1] = vec4<f32>(-1.0, 0.0, 0.0, -1.0 + positive_step.x); // right
-        wg_frustum_planes[2] = vec4<f32>(0.0, -1.0, 0.0, 1.0 - negative_step.y); // bottom
-        wg_frustum_planes[3] = vec4<f32>(0.0, -1.0, 0.0, -1.0 + positive_step.y); // top
-        wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth); // near plane
-        wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, max_depth); // far plane
+        // top left point
+        var temp: vec2<u32> = workgroup_id.xy * BLOCK_SIZE;
+        screen_space[0] = vec4<f32>(f32(temp.x), f32(temp.y), -1.0, 1.0);

-        // convert the side and top planes from clip to view space
+        // top right point
+        var temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y)) * f32(BLOCK_SIZE);
+        screen_space[1] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
+        
+        // bottom left point
+        temp2 = vec2<f32>(f32(workgroup_id.x), f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
+        screen_space[2] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
+        
+        // bottom right point
+        temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
+        screen_space[3] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
+
+        // convert screenspace to view space
+        var view_space: array<vec3<f32>, 4>;
        for (var i = 0u; i < 4u; i++) {
-            wg_frustum_planes[i] *= u_camera.view_projection;
-            wg_frustum_planes[i] /= length(wg_frustum_planes[i].xyz);
+            view_space[i] = screen_to_view(screen_space[i]).xyz;
        }

-        // convert near and far planes from clip to view space
-        wg_frustum_planes[4] *= u_camera.view;
-        wg_frustum_planes[4] /= length(wg_frustum_planes[4].xyz);
-        wg_frustum_planes[5] *= u_camera.view;
-        wg_frustum_planes[5] /= length(wg_frustum_planes[5].xyz);
+        // View space eye is always at the origin
+        let eye_pos = vec3<f32>(0.0, 0.0, 0.0);
+
+        wg_frustum_planes[0] = compute_plane(eye_pos, view_space[2], view_space[0]); // left plane
+        wg_frustum_planes[1] = compute_plane(eye_pos, view_space[1], view_space[3]); // right plane
+        wg_frustum_planes[2] = compute_plane(eye_pos, view_space[0], view_space[1]); // top plane
+        wg_frustum_planes[3] = compute_plane(eye_pos, view_space[3], view_space[2]); // bottom plane
+
+        wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth);
+        wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, -max_depth);
    }

    workgroupBarrier();
@ -143,38 +144,27 @@ fn cs_main(
    // Process the lights detecting which ones to cull for this tile.
    // Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple
    // iterations for more lights.
-    var thread_count = u32(BLOCK_SIZE * BLOCK_SIZE);
-    var pass_count = (u_lights.light_count + thread_count - 1u) / thread_count;
-    for (var i = 0u; i < pass_count; i++) {
-        // find the light index to check on this thread, make sure we're not trying to test
-        // for more lights than we have.
-        var light_index = i * thread_count + local_invocation_index;
-        if (light_index >= u_lights.light_count) {
-            break;
-        }
+    for (var i = local_invocation_index; i < u_lights.light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
+        let light_index = i;

-        var light = u_lights.data[light_index];
-        var position = light.position;
-        var radius = light.range;
+        let light = u_lights.data[light_index];
+        let position_vec4 = u_camera.view * vec4<f32>(light.position, 1.0);
+        let position = position_vec4.xyz;
+        let radius = light.range;

        if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
-            //add_light(light_index);
+            add_light(light_index);
        } else if (light.light_ty == LIGHT_TY_POINT
                && sphere_inside_frustrum(wg_frustum_planes, position, radius)) {
            // TODO: add the light to the transparent geometry list

            add_light(light_index);

-            // TODO: spotlights
            if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) {
-                /*var offset: u32 = wg_visible_light_count;
                
-                if (offset < MAX_TILE_VISIBLE_LIGHTS) {
-                    atomicAdd(&wg_visible_light_count, 1u);
-                    wg_visible_light_indices[offset] = light_index;
-                }*/
            }
        }
+        // TODO: spotlights
    }

    workgroupBarrier();
@ -183,41 +173,27 @@ fn cs_main(

    // first update the light grid on the first thread
    if (local_invocation_index == 0u) {
-        var offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; // index in the global light list
-        textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(offset, wg_visible_light_count, 0u, 1u));
+        wg_light_index_start = atomicAdd(&u_light_index_counter, wg_visible_light_count);
+        textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(wg_light_index_start, wg_visible_light_count, 0u, 1u));
+
+        // TODO: store light grid for transparent geometry
    }

    workgroupBarrier();

    // now update the light index list on all threads.
-    var indices_offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS;
-    //var pass_count = (wg_visible_light_count + thread_count - 1) / thread_count;
-    for (var i = 0u; i < pass_count; i++) {
-        // find the light index to check on this thread, make sure we're not trying to test
-        // for more lights than we have.
-        //var light_index: u32 = i * thread_count + local_invocation_index;
-        /*if (light_index > u_lights.light_count) {
-            u_visible_light_indices
-            break;
-        }*/
-        
-        var offset = indices_offset + i;
-        if (offset >= wg_visible_light_count) {
-            // stop if we're over the over the amount of lights we saw
-            break;
-        }
-
-        u_light_indices[offset] = wg_visible_light_indices[i];
+    for (var i = local_invocation_index; i < wg_visible_light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
+        u_light_indices[wg_light_index_start + i] = wg_visible_light_indices[i];
    }
 }

 /// Add a light to the visible light indicies list.
 /// Returns a boolean indicating if the light was added.
 fn add_light(light_index: u32) -> bool {
-    var offset: u32 = wg_visible_light_count;
+    //var offset: u32 = wg_visible_light_count;

-    if (offset < MAX_TILE_VISIBLE_LIGHTS) {
-        atomicAdd(&wg_visible_light_count, 1u);
+    if (wg_visible_light_count < MAX_TILE_VISIBLE_LIGHTS) {
+        let offset = atomicAdd(&wg_visible_light_count, 1u);
        wg_visible_light_indices[offset] = light_index;
        return true;
    }
@ -232,7 +208,7 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>

    // only check the sides of the frustum
    for (var i = 0u; i < 4u; i++) {
-        if (!sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
+        if (sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
            return false;
        }
    }
@ -245,7 +221,38 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
 /// Source: Real-time collision detection, Christer Ericson (2005)
 ///   (https://www.3dgep.com/forward-plus/#light-culling-compute-shader)
 fn sphere_inside_plane(sphere_origin: vec3<f32>, radius: f32, plane: vec4<f32>) -> bool {
-    //return dot(plane.xyz, sphere_origin) - plane.w < -radius;
-
-    return dot(vec4<f32>(sphere_origin, 0.0), plane) + radius > 0.0;
+    return dot(plane.xyz, sphere_origin) - plane.w < -radius;
+}
+
+fn clip_to_view(clip: vec4<f32>) -> vec4<f32> {
+    // view space position
+    var view = u_camera.inverse_projection * clip;
+
+    // perspective projection
+    return view / view.w;
+}
+
+fn screen_to_view(screen: vec4<f32>) -> vec4<f32> {
+    // convert to normalized texture coordinates
+    let tex_coord = screen.xy / vec2<f32>(u_screen_size);
+
+    // convert to clip space
+    let clip = vec4<f32>( vec2<f32>(tex_coord.x, 1.0 - tex_coord.y) * 2.0 - 1.0, screen.z, screen.w);
+    
+    return clip_to_view(clip);
+}
+
+/// Compute a plane from 3 noncollinear points that form a triangle.
+/// This equation assumes a right-handed (counter-clockwise winding order) 
+/// coordinate system to determine the direction of the plane normal.
+fn compute_plane(p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>) -> vec4<f32> {
+    let v0 = p1 - p0;
+    let v2 = p2 - p0;
+
+    var plane = vec4<f32>(normalize(cross(v0, v2)), 0.0);
+
+    // find the distance to the origin
+    plane.w = dot(plane.xyz, p0);
+
+    return plane;
 }
--- a/shell.nix
+++ b/shell.nix
@ -13,6 +13,7 @@ mkShell rec {
    mold
    udev
    lua5_4_compat
+    rustup
  ];
  buildInputs = [
    udev alsa-lib libGL gcc