// Crest Water System // Copyright © 2024 Wave Harmonic. All rights reserved. // Inspired by https://github.com/speps/GX-EncinoWaves // First SIZE constant must match FFT_KERNEL_0_RESOLUTION in FFTCompute.cs #pragma kernel ComputeFFT SIZE=8 PASSES=3 CHANNEL=x TX=8 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=8 PASSES=3 CHANNEL=y TX=1 TY=8 FINAL=1 #pragma kernel ComputeFFT SIZE=16 PASSES=4 CHANNEL=x TX=16 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=16 PASSES=4 CHANNEL=y TX=1 TY=16 FINAL=1 #pragma kernel ComputeFFT SIZE=32 PASSES=5 CHANNEL=x TX=32 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=32 PASSES=5 CHANNEL=y TX=1 TY=32 FINAL=1 #pragma kernel ComputeFFT SIZE=64 PASSES=6 CHANNEL=x TX=64 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=64 PASSES=6 CHANNEL=y TX=1 TY=64 FINAL=1 #pragma kernel ComputeFFT SIZE=128 PASSES=7 CHANNEL=x TX=128 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=128 PASSES=7 CHANNEL=y TX=1 TY=128 FINAL=1 #pragma kernel ComputeFFT SIZE=256 PASSES=8 CHANNEL=x TX=256 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=256 PASSES=8 CHANNEL=y TX=1 TY=256 FINAL=1 #pragma kernel ComputeFFT SIZE=512 PASSES=9 CHANNEL=x TX=512 TY=1 FINAL=0 #pragma kernel ComputeFFT SIZE=512 PASSES=9 CHANNEL=y TX=1 TY=512 FINAL=1 // Must match CASCADE_COUNT in FFTCompute.cs #define CASCADE_COUNT 16 Texture2D _Crest_InputButterfly; #if !FINAL RWTexture2DArray _Crest_Output1; RWTexture2DArray _Crest_Output2; RWTexture2DArray _Crest_Output3; #else Texture2DArray _Crest_InputH; Texture2DArray _Crest_InputX; Texture2DArray _Crest_InputZ; // Write zero to W to clear garbage. RWTexture2DArray _Crest_Output; #endif groupshared float2 _Crest_IntermediatesH[SIZE]; groupshared float2 _Crest_ScratchH[SIZE]; groupshared float2 _Crest_IntermediatesX[SIZE]; groupshared float2 _Crest_ScratchX[SIZE]; groupshared float2 _Crest_IntermediatesZ[SIZE]; groupshared float2 _Crest_ScratchZ[SIZE]; // This was required for Intel machines, but not required for Apple Silicon. #if defined(SHADER_API_METAL) // reversebits function appears to not make it across to Metal #if !defined(reversebits) uint reversebits( uint x ) { x = ((x >> 1) & 0x55555555u) | ((x & 0x55555555u) << 1); x = ((x >> 2) & 0x33333333u) | ((x & 0x33333333u) << 2); x = ((x >> 4) & 0x0f0f0f0fu) | ((x & 0x0f0f0f0fu) << 4); x = ((x >> 8) & 0x00ff00ffu) | ((x & 0x00ff00ffu) << 8); x = ((x >> 16) & 0xffffu) | ((x & 0xffffu) << 16); return x; } #endif #endif void ButterflyPass(float2 butterfly, uint coord, uint passIndex, uint cascade) { uint indexA, indexB; const uint offset = 1 << passIndex; if ((coord / offset) % 2 == 1) { indexA = coord - offset; indexB = coord; } else { indexA = coord; indexB = coord + offset; } if (passIndex == 0) { indexA = reversebits(indexA) >> (32 - PASSES); indexB = reversebits(indexB) >> (32 - PASSES); } const bool pingpong = (passIndex % 2) == 0; float2 valueA_H, valueB_H; float2 valueA_X, valueB_X; float2 valueA_Z, valueB_Z; if (pingpong) { valueA_H = _Crest_IntermediatesH[indexA]; valueB_H = _Crest_IntermediatesH[indexB]; valueA_X = _Crest_IntermediatesX[indexA]; valueB_X = _Crest_IntermediatesX[indexB]; valueA_Z = _Crest_IntermediatesZ[indexA]; valueB_Z = _Crest_IntermediatesZ[indexB]; } else { valueA_H = _Crest_ScratchH[indexA]; valueB_H = _Crest_ScratchH[indexB]; valueA_X = _Crest_ScratchX[indexA]; valueB_X = _Crest_ScratchX[indexB]; valueA_Z = _Crest_ScratchZ[indexA]; valueB_Z = _Crest_ScratchZ[indexB]; } const float2 weight = butterfly.xy; const float2 weightedValueH = weight * valueB_H.r + weight.gr * valueB_H.g * float2(-1.0, 1.0); const float2 weightedValueX = weight * valueB_X.r + weight.gr * valueB_X.g * float2(-1.0, 1.0); const float2 weightedValueZ = weight * valueB_Z.r + weight.gr * valueB_Z.g * float2(-1.0, 1.0); const float2 resultH = valueA_H + weightedValueH; const float2 resultX = valueA_X + weightedValueX; const float2 resultZ = valueA_Z + weightedValueZ; if (pingpong) { _Crest_ScratchH[coord] = resultH; _Crest_ScratchX[coord] = resultX; _Crest_ScratchZ[coord] = resultZ; } else { _Crest_IntermediatesH[coord] = resultH; _Crest_IntermediatesX[coord] = resultX; _Crest_IntermediatesZ[coord] = resultZ; } } float2 conj(float2 v) { return float2(v.x, -v.y); } [numthreads(TX,TY,1)] void ComputeFFT(const uint3 id : SV_DispatchThreadID) { const uint coord = id.CHANNEL; #if !FINAL _Crest_IntermediatesH[coord] = conj(_Crest_Output1[id]); _Crest_IntermediatesX[coord] = conj(_Crest_Output2[id]); _Crest_IntermediatesZ[coord] = conj(_Crest_Output3[id]); #else _Crest_IntermediatesH[coord] = _Crest_InputH[id]; _Crest_IntermediatesX[coord] = _Crest_InputX[id]; _Crest_IntermediatesZ[coord] = _Crest_InputZ[id]; #endif [unroll(PASSES)] for (uint passIndex = 0; passIndex < PASSES; ++passIndex) { GroupMemoryBarrierWithGroupSync(); ButterflyPass(_Crest_InputButterfly[uint2(coord, passIndex)].xy, coord, passIndex, id.z); } GroupMemoryBarrierWithGroupSync(); const bool pingpong = (PASSES % 2) == 0; const float2 resultH = pingpong ? _Crest_IntermediatesH[coord] : _Crest_ScratchH[coord]; const float2 resultX = pingpong ? _Crest_IntermediatesX[coord] : _Crest_ScratchX[coord]; const float2 resultZ = pingpong ? _Crest_IntermediatesZ[coord] : _Crest_ScratchZ[coord]; #if !FINAL _Crest_Output1[id] = resultH; _Crest_Output2[id] = resultX; _Crest_Output3[id] = resultZ; #else const float sign = ((id.x + id.y) % 2) == 1 ? -1.0 : 1.0; const float3 res = float3(sign * resultX.x, sign * resultH.x, sign * resultZ.x); // Write zero to W to clear garbage. _Crest_Output[id] = float4(sign * resultX.x, sign * resultH.x, sign * resultZ.x, 1.0); #endif }