render: fix the tile frustum used for culling lights
This commit is contained in:
parent
76ec9606ec
commit
014abcf7e6
|
@ -170,7 +170,10 @@ async fn main() {
|
|||
specular: 1.3,
|
||||
},
|
||||
Transform::new(
|
||||
Vec3::new(-5.0, 1.0, -1.28),
|
||||
//Vec3::new(-5.0, 1.0, -1.28),
|
||||
Vec3::new(-5.0, 1.0, -0.28),
|
||||
//Vec3::new(-10.0, 0.94, -0.28),
|
||||
|
||||
Quat::IDENTITY,
|
||||
Vec3::new(0.25, 0.25, 0.25),
|
||||
),
|
||||
|
|
|
@ -247,7 +247,7 @@ impl LightUniform {
|
|||
_padding3: 0,
|
||||
color: light.color,
|
||||
|
||||
range: 2.0,
|
||||
range: 1.5,
|
||||
intensity: 1.0,
|
||||
|
||||
spot_cutoff: 0.0,
|
||||
|
|
|
@ -7,6 +7,7 @@ use winit::dpi::PhysicalSize;
|
|||
use super::{light::LightUniformBuffers, render_buffer::{BindGroupPair, BufferWrapper}, texture::RenderTexture};
|
||||
|
||||
pub(crate) struct LightIndicesGridBuffer {
|
||||
index_counter_buffer: wgpu::Buffer,
|
||||
indices_buffer: wgpu::Buffer,
|
||||
grid_texture: wgpu::Texture,
|
||||
grid_texture_view: wgpu::TextureView,
|
||||
|
@ -34,6 +35,12 @@ impl LightCullCompute {
|
|||
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
|
||||
});
|
||||
|
||||
let light_index_counter_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||
label: Some("B_LightIndexCounter"),
|
||||
contents: &bytemuck::cast_slice(&[0]),
|
||||
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
|
||||
});
|
||||
|
||||
let light_indices_bg_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
entries: &[
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
|
@ -57,15 +64,27 @@ impl LightCullCompute {
|
|||
view_dimension: wgpu::TextureViewDimension::D2
|
||||
},
|
||||
count: None,
|
||||
},
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 2,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Storage {
|
||||
read_only: false
|
||||
},
|
||||
has_dynamic_offset: false,
|
||||
min_binding_size: None,
|
||||
},
|
||||
count: None,
|
||||
}
|
||||
],
|
||||
label: Some("BGL_LightIndicesGrid"),
|
||||
});
|
||||
|
||||
// TODO: shrink the texture to match the amount of grid cells that the shader actually uses
|
||||
// TODO: resize texture when screen is resized
|
||||
let size = wgpu::Extent3d {
|
||||
width: screen_size.width,
|
||||
height: screen_size.height,
|
||||
width: workgroup_size.x,
|
||||
height: workgroup_size.y,
|
||||
depth_or_array_layers: 1,
|
||||
};
|
||||
let grid_texture = device.create_texture(
|
||||
|
@ -108,12 +127,23 @@ impl LightCullCompute {
|
|||
wgpu::BindGroupEntry {
|
||||
binding: 1,
|
||||
resource: wgpu::BindingResource::TextureView(&grid_texture_view)
|
||||
}
|
||||
},
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 2,
|
||||
resource: wgpu::BindingResource::Buffer(
|
||||
wgpu::BufferBinding {
|
||||
buffer: &light_index_counter_buffer,
|
||||
offset: 0,
|
||||
size: None, // the entire light buffer is needed
|
||||
}
|
||||
)
|
||||
},
|
||||
],
|
||||
label: Some("BG_LightIndicesGrid"),
|
||||
});
|
||||
|
||||
LightIndicesGridBuffer {
|
||||
index_counter_buffer: light_index_counter_buffer,
|
||||
indices_buffer: light_indices_buffer,
|
||||
grid_texture,
|
||||
grid_texture_view,
|
||||
|
|
|
@ -21,7 +21,7 @@ struct VertexOutput {
|
|||
|
||||
struct CameraUniform {
|
||||
view: mat4x4<f32>,
|
||||
projection: mat4x4<f32>,
|
||||
inverse_projection: mat4x4<f32>,
|
||||
view_projection: mat4x4<f32>,
|
||||
position: vec3<f32>,
|
||||
};
|
||||
|
@ -126,6 +126,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
|
|||
|
||||
return vec4<f32>(light_object_res, object_color.a);*/
|
||||
|
||||
|
||||
|
||||
let tile_index = vec2<u32>(floor(in.clip_position.xy / 16.0));
|
||||
let tile: vec2<u32> = textureLoad(t_light_grid, tile_index).xy;
|
||||
|
||||
|
|
|
@ -1,21 +1,14 @@
|
|||
const BLOCK_SIZE: i32 = 16;
|
||||
const BLOCK_SIZE: u32 = 16u;
|
||||
const MAX_TILE_VISIBLE_LIGHTS: u32 = 1024u;
|
||||
|
||||
const LIGHT_TY_DIRECTIONAL = 0u;
|
||||
const LIGHT_TY_POINT = 1u;
|
||||
const LIGHT_TY_SPOT = 2u;
|
||||
|
||||
// Possible computer shader inputs:
|
||||
//
|
||||
// local_invocation_id
|
||||
// workgroup_id
|
||||
// global_invocation_id
|
||||
// num_workgroups
|
||||
// local_invocation_index
|
||||
|
||||
struct CameraUniform {
|
||||
view: mat4x4<f32>,
|
||||
projection: mat4x4<f32>,
|
||||
inverse_projection: mat4x4<f32>,
|
||||
//projection: mat4x4<f32>,
|
||||
view_projection: mat4x4<f32>,
|
||||
position: vec3<f32>,
|
||||
};
|
||||
|
@ -42,14 +35,13 @@ struct Lights {
|
|||
|
||||
var<workgroup> wg_min_depth: atomic<u32>;
|
||||
var<workgroup> wg_max_depth: atomic<u32>;
|
||||
var<workgroup> wg_light_index_start: atomic<u32>;
|
||||
var<workgroup> wg_frustum_planes: array<vec4<f32>, 6>;
|
||||
|
||||
// index list of visible light sources for this tile
|
||||
var<workgroup> wg_visible_light_indices: array<u32, MAX_TILE_VISIBLE_LIGHTS>;
|
||||
var<workgroup> wg_visible_light_count: atomic<u32>;
|
||||
|
||||
//var<workgroup> view_projection: mat4x4;
|
||||
|
||||
@group(0) @binding(0)
|
||||
var t_depthmap: texture_depth_2d;
|
||||
@group(0) @binding(1)
|
||||
|
@ -65,6 +57,8 @@ var<storage, read> u_lights: Lights;
|
|||
var<storage, read_write> u_light_indices: array<u32>;
|
||||
@group(3) @binding(1)
|
||||
var t_light_grid: texture_storage_2d<rg32uint, read_write>;
|
||||
@group(3) @binding(2)
|
||||
var<storage, read_write> u_light_index_counter: atomic<u32>;
|
||||
|
||||
@group(4) @binding(0)
|
||||
var<uniform> u_screen_size: vec2<u32>;
|
||||
|
@ -78,12 +72,6 @@ fn cs_main(
|
|||
@builtin(num_workgroups) num_workgroups: vec3<u32>,
|
||||
@builtin(local_invocation_index) local_invocation_index: u32,
|
||||
) {
|
||||
//var location = vec2<i32>(global_invocation_id.xy);
|
||||
var item_id = vec2<i32>(local_invocation_id.xy);
|
||||
var tile_id = vec2<i32>(workgroup_id.xy);
|
||||
var tile_number = vec2<i32>(num_workgroups.xy);
|
||||
var index = tile_id.y * tile_number.x + tile_id.x;
|
||||
|
||||
// Initialize some shared global values for depth and light count
|
||||
if (local_invocation_index == 0u) {
|
||||
wg_min_depth = 0xFFFFFFFu;
|
||||
|
@ -112,28 +100,41 @@ fn cs_main(
|
|||
|
||||
// Create the frustum planes that will be used for this time
|
||||
if (local_invocation_index == 0u) {
|
||||
var negative_step = (2.0 * vec2<f32>(tile_id)) / vec2<f32>(tile_number);
|
||||
var positive_step = (2.0 * vec2<f32>(tile_id) + vec2<f32>(1.0, 1.0)) / vec2<f32>(tile_number);
|
||||
// Compute the 4 corner points on the far clipping plane to use as the frustum vertices.
|
||||
var screen_space: array<vec4<f32>, 4>;
|
||||
|
||||
// z in the vec4 is the distance from the center of the tile
|
||||
wg_frustum_planes[0] = vec4<f32>(1.0, 0.0, 0.0, 1.0 - negative_step.x); // left
|
||||
wg_frustum_planes[1] = vec4<f32>(-1.0, 0.0, 0.0, -1.0 + positive_step.x); // right
|
||||
wg_frustum_planes[2] = vec4<f32>(0.0, -1.0, 0.0, 1.0 - negative_step.y); // bottom
|
||||
wg_frustum_planes[3] = vec4<f32>(0.0, -1.0, 0.0, -1.0 + positive_step.y); // top
|
||||
wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth); // near plane
|
||||
wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, max_depth); // far plane
|
||||
// top left point
|
||||
var temp: vec2<u32> = workgroup_id.xy * BLOCK_SIZE;
|
||||
screen_space[0] = vec4<f32>(f32(temp.x), f32(temp.y), -1.0, 1.0);
|
||||
|
||||
// convert the side and top planes from clip to view space
|
||||
// top right point
|
||||
var temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y)) * f32(BLOCK_SIZE);
|
||||
screen_space[1] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
|
||||
|
||||
// bottom left point
|
||||
temp2 = vec2<f32>(f32(workgroup_id.x), f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
|
||||
screen_space[2] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
|
||||
|
||||
// bottom right point
|
||||
temp2 = vec2<f32>(f32(workgroup_id.x) + 1.0, f32(workgroup_id.y) + 1.0) * f32(BLOCK_SIZE);
|
||||
screen_space[3] = vec4<f32>(temp2.x, temp2.y, -1.0, 1.0);
|
||||
|
||||
// convert screenspace to view space
|
||||
var view_space: array<vec3<f32>, 4>;
|
||||
for (var i = 0u; i < 4u; i++) {
|
||||
wg_frustum_planes[i] *= u_camera.view_projection;
|
||||
wg_frustum_planes[i] /= length(wg_frustum_planes[i].xyz);
|
||||
view_space[i] = screen_to_view(screen_space[i]).xyz;
|
||||
}
|
||||
|
||||
// convert near and far planes from clip to view space
|
||||
wg_frustum_planes[4] *= u_camera.view;
|
||||
wg_frustum_planes[4] /= length(wg_frustum_planes[4].xyz);
|
||||
wg_frustum_planes[5] *= u_camera.view;
|
||||
wg_frustum_planes[5] /= length(wg_frustum_planes[5].xyz);
|
||||
// View space eye is always at the origin
|
||||
let eye_pos = vec3<f32>(0.0, 0.0, 0.0);
|
||||
|
||||
wg_frustum_planes[0] = compute_plane(eye_pos, view_space[2], view_space[0]); // left plane
|
||||
wg_frustum_planes[1] = compute_plane(eye_pos, view_space[1], view_space[3]); // right plane
|
||||
wg_frustum_planes[2] = compute_plane(eye_pos, view_space[0], view_space[1]); // top plane
|
||||
wg_frustum_planes[3] = compute_plane(eye_pos, view_space[3], view_space[2]); // bottom plane
|
||||
|
||||
wg_frustum_planes[4] = vec4<f32>(0.0, 0.0, -1.0, -min_depth);
|
||||
wg_frustum_planes[5] = vec4<f32>(0.0, 0.0, 1.0, -max_depth);
|
||||
}
|
||||
|
||||
workgroupBarrier();
|
||||
|
@ -143,38 +144,27 @@ fn cs_main(
|
|||
// Process the lights detecting which ones to cull for this tile.
|
||||
// Processes 256 lights simultaniously, each on a thread in the workgroup. Requires multiple
|
||||
// iterations for more lights.
|
||||
var thread_count = u32(BLOCK_SIZE * BLOCK_SIZE);
|
||||
var pass_count = (u_lights.light_count + thread_count - 1u) / thread_count;
|
||||
for (var i = 0u; i < pass_count; i++) {
|
||||
// find the light index to check on this thread, make sure we're not trying to test
|
||||
// for more lights than we have.
|
||||
var light_index = i * thread_count + local_invocation_index;
|
||||
if (light_index >= u_lights.light_count) {
|
||||
break;
|
||||
}
|
||||
for (var i = local_invocation_index; i < u_lights.light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
|
||||
let light_index = i;
|
||||
|
||||
var light = u_lights.data[light_index];
|
||||
var position = light.position;
|
||||
var radius = light.range;
|
||||
let light = u_lights.data[light_index];
|
||||
let position_vec4 = u_camera.view * vec4<f32>(light.position, 1.0);
|
||||
let position = position_vec4.xyz;
|
||||
let radius = light.range;
|
||||
|
||||
if (light.light_ty == LIGHT_TY_DIRECTIONAL) {
|
||||
//add_light(light_index);
|
||||
add_light(light_index);
|
||||
} else if (light.light_ty == LIGHT_TY_POINT
|
||||
&& sphere_inside_frustrum(wg_frustum_planes, position, radius)) {
|
||||
// TODO: add the light to the transparent geometry list
|
||||
|
||||
add_light(light_index);
|
||||
|
||||
// TODO: spotlights
|
||||
if (!sphere_inside_plane(position, radius, wg_frustum_planes[4])) {
|
||||
/*var offset: u32 = wg_visible_light_count;
|
||||
|
||||
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
|
||||
atomicAdd(&wg_visible_light_count, 1u);
|
||||
wg_visible_light_indices[offset] = light_index;
|
||||
}*/
|
||||
|
||||
}
|
||||
}
|
||||
// TODO: spotlights
|
||||
}
|
||||
|
||||
workgroupBarrier();
|
||||
|
@ -183,41 +173,27 @@ fn cs_main(
|
|||
|
||||
// first update the light grid on the first thread
|
||||
if (local_invocation_index == 0u) {
|
||||
var offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS; // index in the global light list
|
||||
textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(offset, wg_visible_light_count, 0u, 1u));
|
||||
wg_light_index_start = atomicAdd(&u_light_index_counter, wg_visible_light_count);
|
||||
textureStore(t_light_grid, workgroup_id.xy, vec4<u32>(wg_light_index_start, wg_visible_light_count, 0u, 1u));
|
||||
|
||||
// TODO: store light grid for transparent geometry
|
||||
}
|
||||
|
||||
workgroupBarrier();
|
||||
|
||||
// now update the light index list on all threads.
|
||||
var indices_offset = u32(index) * MAX_TILE_VISIBLE_LIGHTS;
|
||||
//var pass_count = (wg_visible_light_count + thread_count - 1) / thread_count;
|
||||
for (var i = 0u; i < pass_count; i++) {
|
||||
// find the light index to check on this thread, make sure we're not trying to test
|
||||
// for more lights than we have.
|
||||
//var light_index: u32 = i * thread_count + local_invocation_index;
|
||||
/*if (light_index > u_lights.light_count) {
|
||||
u_visible_light_indices
|
||||
break;
|
||||
}*/
|
||||
|
||||
var offset = indices_offset + i;
|
||||
if (offset >= wg_visible_light_count) {
|
||||
// stop if we're over the over the amount of lights we saw
|
||||
break;
|
||||
}
|
||||
|
||||
u_light_indices[offset] = wg_visible_light_indices[i];
|
||||
for (var i = local_invocation_index; i < wg_visible_light_count; i += BLOCK_SIZE * BLOCK_SIZE) {
|
||||
u_light_indices[wg_light_index_start + i] = wg_visible_light_indices[i];
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a light to the visible light indicies list.
|
||||
/// Returns a boolean indicating if the light was added.
|
||||
fn add_light(light_index: u32) -> bool {
|
||||
var offset: u32 = wg_visible_light_count;
|
||||
//var offset: u32 = wg_visible_light_count;
|
||||
|
||||
if (offset < MAX_TILE_VISIBLE_LIGHTS) {
|
||||
atomicAdd(&wg_visible_light_count, 1u);
|
||||
if (wg_visible_light_count < MAX_TILE_VISIBLE_LIGHTS) {
|
||||
let offset = atomicAdd(&wg_visible_light_count, 1u);
|
||||
wg_visible_light_indices[offset] = light_index;
|
||||
return true;
|
||||
}
|
||||
|
@ -232,7 +208,7 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
|
|||
|
||||
// only check the sides of the frustum
|
||||
for (var i = 0u; i < 4u; i++) {
|
||||
if (!sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
|
||||
if (sphere_inside_plane(sphere_origin, radius, frustum_v[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -245,7 +221,38 @@ fn sphere_inside_frustrum(frustum: array<vec4<f32>, 6>, sphere_origin: vec3<f32>
|
|||
/// Source: Real-time collision detection, Christer Ericson (2005)
|
||||
/// (https://www.3dgep.com/forward-plus/#light-culling-compute-shader)
|
||||
fn sphere_inside_plane(sphere_origin: vec3<f32>, radius: f32, plane: vec4<f32>) -> bool {
|
||||
//return dot(plane.xyz, sphere_origin) - plane.w < -radius;
|
||||
return dot(plane.xyz, sphere_origin) - plane.w < -radius;
|
||||
}
|
||||
|
||||
return dot(vec4<f32>(sphere_origin, 0.0), plane) + radius > 0.0;
|
||||
fn clip_to_view(clip: vec4<f32>) -> vec4<f32> {
|
||||
// view space position
|
||||
var view = u_camera.inverse_projection * clip;
|
||||
|
||||
// perspective projection
|
||||
return view / view.w;
|
||||
}
|
||||
|
||||
fn screen_to_view(screen: vec4<f32>) -> vec4<f32> {
|
||||
// convert to normalized texture coordinates
|
||||
let tex_coord = screen.xy / vec2<f32>(u_screen_size);
|
||||
|
||||
// convert to clip space
|
||||
let clip = vec4<f32>( vec2<f32>(tex_coord.x, 1.0 - tex_coord.y) * 2.0 - 1.0, screen.z, screen.w);
|
||||
|
||||
return clip_to_view(clip);
|
||||
}
|
||||
|
||||
/// Compute a plane from 3 noncollinear points that form a triangle.
|
||||
/// This equation assumes a right-handed (counter-clockwise winding order)
|
||||
/// coordinate system to determine the direction of the plane normal.
|
||||
fn compute_plane(p0: vec3<f32>, p1: vec3<f32>, p2: vec3<f32>) -> vec4<f32> {
|
||||
let v0 = p1 - p0;
|
||||
let v2 = p2 - p0;
|
||||
|
||||
var plane = vec4<f32>(normalize(cross(v0, v2)), 0.0);
|
||||
|
||||
// find the distance to the origin
|
||||
plane.w = dot(plane.xyz, p0);
|
||||
|
||||
return plane;
|
||||
}
|
Loading…
Reference in New Issue