diff --git a/examples/testbed/src/main.rs b/examples/testbed/src/main.rs index 3877252..f7e8e54 100644 --- a/examples/testbed/src/main.rs +++ b/examples/testbed/src/main.rs @@ -170,7 +170,10 @@ async fn main() { specular: 1.3, }, Transform::new( - Vec3::new(-5.0, 1.0, -1.28), + //Vec3::new(-5.0, 1.0, -1.28), + Vec3::new(-5.0, 1.0, -0.28), + //Vec3::new(-10.0, 0.94, -0.28), + Quat::IDENTITY, Vec3::new(0.25, 0.25, 0.25), ), diff --git a/lyra-game/src/render/light/mod.rs b/lyra-game/src/render/light/mod.rs index 1f5cfc9..eaebbaa 100644 --- a/lyra-game/src/render/light/mod.rs +++ b/lyra-game/src/render/light/mod.rs @@ -247,7 +247,7 @@ impl LightUniform { _padding3: 0, color: light.color, - range: 2.0, + range: 1.5, intensity: 1.0, spot_cutoff: 0.0, diff --git a/lyra-game/src/render/light_cull_compute.rs b/lyra-game/src/render/light_cull_compute.rs index 441ca16..089c313 100644 --- a/lyra-game/src/render/light_cull_compute.rs +++ b/lyra-game/src/render/light_cull_compute.rs @@ -7,6 +7,7 @@ use winit::dpi::PhysicalSize; use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture}; pub(crate) struct LightIndicesGridBuffer { + index_counter_buffer: wgpu::Buffer, indices_buffer: wgpu::Buffer, grid_texture: wgpu::Texture, grid_texture_view: wgpu::TextureView, @@ -34,6 +35,12 @@ impl LightCullCompute { usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, }); + let light_index_counter_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("B_LightIndexCounter"), + contents: &bytemuck::cast_slice(&[0]), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + }); + let light_indices_bg_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { entries: &[ wgpu::BindGroupLayoutEntry { @@ -57,15 +64,27 @@ impl LightCullCompute { view_dimension: wgpu::TextureViewDimension::D2 }, count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { + read_only: false + }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, } ], label: Some("BGL_LightIndicesGrid"), }); - // TODO: shrink the texture to match the amount of grid cells that the shader actually uses + // TODO: resize texture when screen is resized let size = wgpu::Extent3d { - width: screen_size.width, - height: screen_size.height, + width: workgroup_size.x, + height: workgroup_size.y, depth_or_array_layers: 1, }; let grid_texture = device.create_texture( @@ -108,12 +127,23 @@ impl LightCullCompute { wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&grid_texture_view) - } + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::Buffer( + wgpu::BufferBinding { + buffer: &light_index_counter_buffer, + offset: 0, + size: None, // the entire light buffer is needed + } + ) + }, ], label: Some("BG_LightIndicesGrid"), }); LightIndicesGridBuffer { + index_counter_buffer: light_index_counter_buffer, indices_buffer: light_indices_buffer, grid_texture, grid_texture_view, diff --git a/lyra-game/src/render/shaders/base.wgsl b/lyra-game/src/render/shaders/base.wgsl index f025ca4..03c990b 100755 --- a/lyra-game/src/render/shaders/base.wgsl +++ b/lyra-game/src/render/shaders/base.wgsl @@ -21,7 +21,7 @@ struct VertexOutput { struct CameraUniform { view: mat4x4, - projection: mat4x4, + inverse_projection: mat4x4, view_projection: mat4x4, position: vec3, }; @@ -126,6 +126,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { return vec4(light_object_res, object_color.a);*/ + + let tile_index = vec2(floor(in.clip_position.xy / 16.0)); let tile: vec2 = textureLoad(t_light_grid, tile_index).xy; diff --git a/lyra-game/src/render/shaders/light_cull.comp.wgsl b/lyra-game/src/render/shaders/light_cull.comp.wgsl index e26f170..e45f27a 100644 --- a/lyra-game/src/render/shaders/light_cull.comp.wgsl +++ b/lyra-game/src/render/shaders/light_cull.comp.wgsl @@ -1,21 +1,14 @@ -const BLOCK_SIZE: i32 = 16; +const BLOCK_SIZE: u32 = 16u; const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u; const LIGHT_TY_DIRECTIONAL = 0u; const LIGHT_TY_POINT = 1u; const LIGHT_TY_SPOT = 2u; -// Possible computer shader inputs: -// -// local_invocation_id -// workgroup_id -// global_invocation_id -// num_workgroups -// local_invocation_index - struct CameraUniform { view: mat4x4, - projection: mat4x4, + inverse_projection: mat4x4, + //projection: mat4x4, view_projection: mat4x4, position: vec3, }; @@ -42,14 +35,13 @@ struct Lights { var wg_min_depth: atomic; var wg_max_depth: atomic; +var wg_light_index_start: atomic; var wg_frustum_planes: array, 6>; // index list of visible light sources for this tile var wg_visible_light_indices: array; var wg_visible_light_count: atomic; -//var view_projection: mat4x4; - @group(0) @binding(0) var t_depthmap: texture_depth_2d; @group(0) @binding(1) @@ -65,6 +57,8 @@ var u_lights: Lights; var u_light_indices: array; @group(3) @binding(1) var t_light_grid: texture_storage_2d; +@group(3) @binding(2) +var u_light_index_counter: atomic; @group(4) @binding(0) var u_screen_size: vec2; @@ -78,12 +72,6 @@ fn cs_main( @builtin(num_workgroups) num_workgroups: vec3, @builtin(local_invocation_index) local_invocation_index: u32, ) { - //var location = vec2(global_invocation_id.xy); - var item_id = vec2(local_invocation_id.xy); - var tile_id = vec2(workgroup_id.xy); - var tile_number = vec2(num_workgroups.xy); - var index = tile_id.y * tile_number.x + tile_id.x; - // Initialize some shared global values for depth and light count if (local_invocation_index == 0u) { wg_min_depth = 0xFFFFFFFu; @@ -112,28 +100,41 @@ fn cs_main( // Create the frustum planes that will be used for this time if (local_invocation_index == 0u) { - var negative_step = (2.0 * vec2(tile_id)) / vec2(tile_number); - var positive_step = (2.0 * vec2(tile_id) + vec2(1.0, 1.0)) / vec2(tile_number); + // Compute the 4 corner points on the far clipping plane to use as the frustum vertices. + var screen_space: array, 4>; - // z in the vec4 is the distance from the center of the tile - wg_frustum_planes[0] = vec4(1.0, 0.0, 0.0, 1.0 - negative_step.x); // left - wg_frustum_planes[1] = vec4(-1.0, 0.0, 0.0, -1.0 + positive_step.x); // right - wg_frustum_planes[2] = vec4(0.0, -1.0, 0.0, 1.0 - negative_step.y); // bottom - wg_frustum_planes[3] = vec4(0.0, -1.0, 0.0, -1.0 + positive_step.y); // top - wg_frustum_planes[4] = vec4(0.0, 0.0, -1.0, -min_depth); // near plane - wg_frustum_planes[5] = vec4(0.0, 0.0, 1.0, max_depth); // far plane + // top left point + var temp: vec2 = workgroup_id.xy * BLOCK_SIZE; + screen_space[0] = vec4(f32(temp.x), f32(temp.y), -1.0, 1.0); - // convert the side and top planes from clip to view space + // top right point + var temp2 = vec2(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y)) * f32(BLOCK_SIZE); + screen_space[1] = vec4(temp2.x, temp2.y, -1.0, 1.0); + + // bottom left point + temp2 = vec2(f32(workgroup_id.x), f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE); + screen_space[2] = vec4(temp2.x, temp2.y, -1.0, 1.0); + + // bottom right point + temp2 = vec2(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE); + screen_space[3] = vec4(temp2.x, temp2.y, -1.0, 1.0); + + // convert screenspace to view space + var view_space: array, 4>; for (var i = 0u; i < 4u; i++) { - wg_frustum_planes[i] *= u_camera.view_projection; - wg_frustum_planes[i] /= length(wg_frustum_planes[i].xyz); + view_space[i] = screen_to_view(screen_space[i]).xyz; } - // convert near and far planes from clip to view space - wg_frustum_planes[4] *= u_camera.view; - wg_frustum_planes[4] /= length(wg_frustum_planes[4].xyz); - wg_frustum_planes[5] *= u_camera.view; - wg_frustum_planes[5] /= length(wg_frustum_planes[5].xyz); + // View space eye is always at the origin + let eye_pos = vec3(0.0, 0.0, 0.0); + + wg_frustum_planes[0] = compute_plane(eye_pos, view_space[2], view_space[0]); // left plane + wg_frustum_planes[1] = compute_plane(eye_pos, view_space[1], view_space[3]); // right plane + wg_frustum_planes[2] = compute_plane(eye_pos, view_space[0], view_space[1]); // top plane + wg_frustum_planes[3] = compute_plane(eye_pos, view_space[3], view_space[2]); // bottom plane + + wg_frustum_planes[4] = vec4(0.0, 0.0, -1.0, -min_depth); + wg_frustum_planes[5] = vec4(0.0, 0.0, 1.0, -max_depth); } workgroupBarrier(); @@ -143,38 +144,27 @@ fn cs_main( // Process the lights detecting which ones to cull for this tile. // Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple // iterations for more lights. - var thread_count = u32(BLOCK_SIZE * BLOCK_SIZE); - var pass_count = (u_lights.light_count + thread_count - 1u) / thread_count; - for (var i = 0u; i < pass_count; i++) { - // find the light index to check on this thread, make sure we're not trying to test - // for more lights than we have. - var light_index = i * thread_count + local_invocation_index; - if (light_index >= u_lights.light_count) { - break; - } + for (var i = local_invocation_index; i < u_lights.light_count; i += BLOCK_SIZE * BLOCK_SIZE) { + let light_index = i; - var light = u_lights.data[light_index]; - var position = light.position; - var radius = light.range; + let light = u_lights.data[light_index]; + let position_vec4 = u_camera.view * vec4(light.position, 1.0); + let position = position_vec4.xyz; + let radius = light.range; if (light.light_ty == LIGHT_TY_DIRECTIONAL) { - //add_light(light_index); + add_light(light_index); } else if (light.light_ty == LIGHT_TY_POINT && sphere_inside_frustrum(wg_frustum_planes, position, radius)) { // TODO: add the light to the transparent geometry list add_light(light_index); - // TODO: spotlights if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) { - /*var offset: u32 = wg_visible_light_count; - - if (offset < MAX_TILE_VISIBLE_LIGHTS) { - atomicAdd(&wg_visible_light_count, 1u); - wg_visible_light_indices[offset] = light_index; - }*/ + } } + // TODO: spotlights } workgroupBarrier(); @@ -183,41 +173,27 @@ fn cs_main( // first update the light grid on the first thread if (local_invocation_index == 0u) { - var offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; // index in the global light list - textureStore(t_light_grid, workgroup_id.xy, vec4(offset, wg_visible_light_count, 0u, 1u)); + wg_light_index_start = atomicAdd(&u_light_index_counter, wg_visible_light_count); + textureStore(t_light_grid, workgroup_id.xy, vec4(wg_light_index_start, wg_visible_light_count, 0u, 1u)); + + // TODO: store light grid for transparent geometry } workgroupBarrier(); // now update the light index list on all threads. - var indices_offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; - //var pass_count = (wg_visible_light_count + thread_count - 1) / thread_count; - for (var i = 0u; i < pass_count; i++) { - // find the light index to check on this thread, make sure we're not trying to test - // for more lights than we have. - //var light_index: u32 = i * thread_count + local_invocation_index; - /*if (light_index > u_lights.light_count) { - u_visible_light_indices - break; - }*/ - - var offset = indices_offset + i; - if (offset >= wg_visible_light_count) { - // stop if we're over the over the amount of lights we saw - break; - } - - u_light_indices[offset] = wg_visible_light_indices[i]; + for (var i = local_invocation_index; i < wg_visible_light_count; i += BLOCK_SIZE * BLOCK_SIZE) { + u_light_indices[wg_light_index_start + i] = wg_visible_light_indices[i]; } } /// Add a light to the visible light indicies list. /// Returns a boolean indicating if the light was added. fn add_light(light_index: u32) -> bool { - var offset: u32 = wg_visible_light_count; + //var offset: u32 = wg_visible_light_count; - if (offset < MAX_TILE_VISIBLE_LIGHTS) { - atomicAdd(&wg_visible_light_count, 1u); + if (wg_visible_light_count < MAX_TILE_VISIBLE_LIGHTS) { + let offset = atomicAdd(&wg_visible_light_count, 1u); wg_visible_light_indices[offset] = light_index; return true; } @@ -232,7 +208,7 @@ fn sphere_inside_frustrum(frustum: array, 6>, sphere_origin: vec3 // only check the sides of the frustum for (var i = 0u; i < 4u; i++) { - if (!sphere_inside_plane(sphere_origin, radius, frustum_v[i])) { + if (sphere_inside_plane(sphere_origin, radius, frustum_v[i])) { return false; } } @@ -245,7 +221,38 @@ fn sphere_inside_frustrum(frustum: array, 6>, sphere_origin: vec3 /// Source: Real-time collision detection, Christer Ericson (2005) /// (https://www.3dgep.com/forward-plus/#light-culling-compute-shader) fn sphere_inside_plane(sphere_origin: vec3, radius: f32, plane: vec4) -> bool { - //return dot(plane.xyz, sphere_origin) - plane.w < -radius; + return dot(plane.xyz, sphere_origin) - plane.w < -radius; +} - return dot(vec4(sphere_origin, 0.0), plane) + radius > 0.0; +fn clip_to_view(clip: vec4) -> vec4 { + // view space position + var view = u_camera.inverse_projection * clip; + + // perspective projection + return view / view.w; +} + +fn screen_to_view(screen: vec4) -> vec4 { + // convert to normalized texture coordinates + let tex_coord = screen.xy / vec2(u_screen_size); + + // convert to clip space + let clip = vec4( vec2(tex_coord.x, 1.0 - tex_coord.y) * 2.0 - 1.0, screen.z, screen.w); + + return clip_to_view(clip); +} + +/// Compute a plane from 3 noncollinear points that form a triangle. +/// This equation assumes a right-handed (counter-clockwise winding order) +/// coordinate system to determine the direction of the plane normal. +fn compute_plane(p0: vec3, p1: vec3, p2: vec3) -> vec4 { + let v0 = p1 - p0; + let v2 = p2 - p0; + + var plane = vec4(normalize(cross(v0, v2)), 0.0); + + // find the distance to the origin + plane.w = dot(plane.xyz, p0); + + return plane; } \ No newline at end of file diff --git a/shell.nix b/shell.nix index 551dc4c..87fd52d 100755 --- a/shell.nix +++ b/shell.nix @@ -13,6 +13,7 @@ mkShell rec { mold udev lua5_4_compat + rustup ]; buildInputs = [ udev alsa-lib libGL gcc