// based on https://github.com/m-schuetz/compute_rasterizer #pragma kernel clear #pragma kernel splat_depth #pragma kernel splat_color #pragma kernel expand #pragma kernel blit // #pragma use_dxc // #pragma require Int64BufferAtomics #include "UnityCG.cginc" struct Particle { float4 position; float4 color; }; RWStructuredBuffer points; uint2 size; float epsilonColor; float epsilonExpand; Texture2D CameraDepth; RWStructuredBuffer DepthBuffer; RWStructuredBuffer ColorBuffer; float4x4 CameraVP; uint index(uint2 coords) { return coords.x + coords.y * size.x; } [numthreads(32, 32, 1)] void clear (uint3 id : SV_DispatchThreadID) { uint pixelIdx = index(id.xy); float depth = CameraDepth[id.xy]; DepthBuffer[pixelIdx] = asuint(LinearEyeDepth(depth)); ColorBuffer[pixelIdx] = 0; } [numthreads(1024, 1, 1)] void splat_depth (uint3 id : SV_DispatchThreadID) { Particle particle = points[id.x]; float4 position = mul(CameraVP, float4(particle.position.xyz, 1)); float3 pos = position.xyz / position.w; if (pos.z <= -1 || pos.z >= 1 || pos.x <= -1 || pos.x >= 1 || pos.y <= -1 || pos.y >= 1) { return; } uint pixelIdx = index((pos.xy * 0.5 + 0.5) * size); uint oldDepth = DepthBuffer[pixelIdx]; uint depth = asuint(position.w); if (depth < oldDepth) { InterlockedMin(DepthBuffer[pixelIdx], depth); } } [numthreads(1024, 1, 1)] void splat_color (uint3 id : SV_DispatchThreadID) { Particle particle = points[id.x]; float4 position = mul(CameraVP, float4(particle.position.xyz, 1)); float3 pos = position.xyz / position.w; if (pos.z <= -1 || pos.z >= 1 || pos.x <= -1 || pos.x >= 1 || pos.y <= -1 || pos.y >= 1) { return; } uint pixelIdx = index((pos.xy * 0.5 + 0.5) * size); float depth = position.w;//asuint(position.w); float oldDepth = asfloat(DepthBuffer[pixelIdx]); if (depth <= oldDepth + epsilonColor) { uint3 color = particle.color.rgb * 256; InterlockedAdd(ColorBuffer[pixelIdx].r, color.r); InterlockedAdd(ColorBuffer[pixelIdx].g, color.g); InterlockedAdd(ColorBuffer[pixelIdx].b, color.b); InterlockedAdd(ColorBuffer[pixelIdx].a, 1); } } RWStructuredBuffer DepthBuffer2; RWStructuredBuffer ColorBuffer2; [numthreads(32, 32, 1)] void expand (uint3 id : SV_DispatchThreadID) { uint idx = index(id.xy); uint idl = index(id.xy + uint2(-1, 0)); uint idr = index(id.xy + uint2(+1, 0)); uint idu = index(id.xy + uint2(0, -1)); uint idd = index(id.xy + uint2(0, +1)); float depth = asfloat(DepthBuffer[index(id.xy)]); float4 n = float4(asfloat(DepthBuffer[idl]), asfloat(DepthBuffer[idr]), asfloat(DepthBuffer[idu]), asfloat(DepthBuffer[idd])); float minDepth = min(depth, min(min(n.x, n.y), min(n.z, n.w))); float expandDepth = minDepth + epsilonExpand; bool dIn = depth <= expandDepth; bool4 nIn = bool4((n.x <= expandDepth) && !dIn, (n.y <= expandDepth) && !dIn, (n.z <= expandDepth) && !dIn, (n.w <= expandDepth) && !dIn); uint sum = dIn + nIn.x + nIn.y + nIn.z + nIn.w; DepthBuffer2[idx] = asuint(minDepth); // DepthBuffer2[idx] = sum == 0 ? depth : // (((dIn ? depth : 0) // + (nIn.x ? n.x : 0) // + (nIn.y ? n.y : 0) // + (nIn.z ? n.z : 0) // + (nIn.w ? n.w : 0)) / sum); uint4 color = ColorBuffer[idx]; ColorBuffer2[idx] = sum == 0 ? color : ((dIn ? color : 0) + (nIn.x ? ColorBuffer[idl] : 0) + (nIn.y ? ColorBuffer[idr] : 0) + (nIn.z ? ColorBuffer[idu] : 0) + (nIn.w ? ColorBuffer[idd] : 0)); } Texture2D source; RWTexture2D res; [numthreads(32, 32, 1)] void blit (uint3 id : SV_DispatchThreadID) { float4 raw = ColorBuffer[index(id.xy)]; // smoothing // raw += (ColorBuffer[index(id.xy + uint2(1, 0))] // + ColorBuffer[index(id.xy + uint2(-1, 0))] // + ColorBuffer[index(id.xy + uint2(0, 1))] // + ColorBuffer[index(id.xy + uint2(0, -1))]) * 0.5; // raw += (ColorBuffer[index(id.xy + uint2(1, 1))] // + ColorBuffer[index(id.xy + uint2(-1, 1))] // + ColorBuffer[index(id.xy + uint2(1, -1))] // + ColorBuffer[index(id.xy + uint2(-1, -1))]) * 0.25; float3 color = raw.w == 0 ? float3(0,0,0) : (raw.rgb * (1.0 / 256.0)) / (float)raw.w; res[id.xy] = lerp(source[id.xy], float4(color, 1), min(1, raw.w)); }