177 lines
6.0 KiB
Plaintext
177 lines
6.0 KiB
Plaintext
// Crest Water System
|
|
// Copyright © 2024 Wave Harmonic. All rights reserved.
|
|
|
|
// Inspired by https://github.com/speps/GX-EncinoWaves
|
|
|
|
// First SIZE constant must match FFT_KERNEL_0_RESOLUTION in FFTCompute.cs
|
|
#pragma kernel ComputeFFT SIZE=8 PASSES=3 CHANNEL=x TX=8 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=8 PASSES=3 CHANNEL=y TX=1 TY=8 FINAL=1
|
|
#pragma kernel ComputeFFT SIZE=16 PASSES=4 CHANNEL=x TX=16 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=16 PASSES=4 CHANNEL=y TX=1 TY=16 FINAL=1
|
|
#pragma kernel ComputeFFT SIZE=32 PASSES=5 CHANNEL=x TX=32 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=32 PASSES=5 CHANNEL=y TX=1 TY=32 FINAL=1
|
|
#pragma kernel ComputeFFT SIZE=64 PASSES=6 CHANNEL=x TX=64 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=64 PASSES=6 CHANNEL=y TX=1 TY=64 FINAL=1
|
|
#pragma kernel ComputeFFT SIZE=128 PASSES=7 CHANNEL=x TX=128 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=128 PASSES=7 CHANNEL=y TX=1 TY=128 FINAL=1
|
|
#pragma kernel ComputeFFT SIZE=256 PASSES=8 CHANNEL=x TX=256 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=256 PASSES=8 CHANNEL=y TX=1 TY=256 FINAL=1
|
|
#pragma kernel ComputeFFT SIZE=512 PASSES=9 CHANNEL=x TX=512 TY=1 FINAL=0
|
|
#pragma kernel ComputeFFT SIZE=512 PASSES=9 CHANNEL=y TX=1 TY=512 FINAL=1
|
|
|
|
// Must match CASCADE_COUNT in FFTCompute.cs
|
|
#define CASCADE_COUNT 16
|
|
|
|
Texture2D<float2> _Crest_InputButterfly;
|
|
#if !FINAL
|
|
RWTexture2DArray<float2> _Crest_Output1;
|
|
RWTexture2DArray<float2> _Crest_Output2;
|
|
RWTexture2DArray<float2> _Crest_Output3;
|
|
#else
|
|
Texture2DArray<float2> _Crest_InputH;
|
|
Texture2DArray<float2> _Crest_InputX;
|
|
Texture2DArray<float2> _Crest_InputZ;
|
|
// Write zero to W to clear garbage.
|
|
RWTexture2DArray<float4> _Crest_Output;
|
|
#endif
|
|
|
|
groupshared float2 _Crest_IntermediatesH[SIZE];
|
|
groupshared float2 _Crest_ScratchH[SIZE];
|
|
groupshared float2 _Crest_IntermediatesX[SIZE];
|
|
groupshared float2 _Crest_ScratchX[SIZE];
|
|
groupshared float2 _Crest_IntermediatesZ[SIZE];
|
|
groupshared float2 _Crest_ScratchZ[SIZE];
|
|
|
|
// This was required for Intel machines, but not required for Apple Silicon.
|
|
#if defined(SHADER_API_METAL)
|
|
// reversebits function appears to not make it across to Metal
|
|
#if !defined(reversebits)
|
|
uint reversebits( uint x )
|
|
{
|
|
x = ((x >> 1) & 0x55555555u) | ((x & 0x55555555u) << 1);
|
|
x = ((x >> 2) & 0x33333333u) | ((x & 0x33333333u) << 2);
|
|
x = ((x >> 4) & 0x0f0f0f0fu) | ((x & 0x0f0f0f0fu) << 4);
|
|
x = ((x >> 8) & 0x00ff00ffu) | ((x & 0x00ff00ffu) << 8);
|
|
x = ((x >> 16) & 0xffffu) | ((x & 0xffffu) << 16);
|
|
return x;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
void ButterflyPass(float2 butterfly, uint coord, uint passIndex, uint cascade)
|
|
{
|
|
uint indexA, indexB;
|
|
|
|
const uint offset = 1 << passIndex;
|
|
if ((coord / offset) % 2 == 1)
|
|
{
|
|
indexA = coord - offset;
|
|
indexB = coord;
|
|
}
|
|
else
|
|
{
|
|
indexA = coord;
|
|
indexB = coord + offset;
|
|
}
|
|
|
|
if (passIndex == 0)
|
|
{
|
|
indexA = reversebits(indexA) >> (32 - PASSES);
|
|
indexB = reversebits(indexB) >> (32 - PASSES);
|
|
}
|
|
|
|
const bool pingpong = (passIndex % 2) == 0;
|
|
|
|
float2 valueA_H, valueB_H;
|
|
float2 valueA_X, valueB_X;
|
|
float2 valueA_Z, valueB_Z;
|
|
if (pingpong)
|
|
{
|
|
valueA_H = _Crest_IntermediatesH[indexA];
|
|
valueB_H = _Crest_IntermediatesH[indexB];
|
|
|
|
valueA_X = _Crest_IntermediatesX[indexA];
|
|
valueB_X = _Crest_IntermediatesX[indexB];
|
|
|
|
valueA_Z = _Crest_IntermediatesZ[indexA];
|
|
valueB_Z = _Crest_IntermediatesZ[indexB];
|
|
}
|
|
else
|
|
{
|
|
valueA_H = _Crest_ScratchH[indexA];
|
|
valueB_H = _Crest_ScratchH[indexB];
|
|
|
|
valueA_X = _Crest_ScratchX[indexA];
|
|
valueB_X = _Crest_ScratchX[indexB];
|
|
|
|
valueA_Z = _Crest_ScratchZ[indexA];
|
|
valueB_Z = _Crest_ScratchZ[indexB];
|
|
}
|
|
|
|
const float2 weight = butterfly.xy;
|
|
const float2 weightedValueH = weight * valueB_H.r + weight.gr * valueB_H.g * float2(-1.0, 1.0);
|
|
const float2 weightedValueX = weight * valueB_X.r + weight.gr * valueB_X.g * float2(-1.0, 1.0);
|
|
const float2 weightedValueZ = weight * valueB_Z.r + weight.gr * valueB_Z.g * float2(-1.0, 1.0);
|
|
const float2 resultH = valueA_H + weightedValueH;
|
|
const float2 resultX = valueA_X + weightedValueX;
|
|
const float2 resultZ = valueA_Z + weightedValueZ;
|
|
|
|
if (pingpong)
|
|
{
|
|
_Crest_ScratchH[coord] = resultH;
|
|
_Crest_ScratchX[coord] = resultX;
|
|
_Crest_ScratchZ[coord] = resultZ;
|
|
}
|
|
else
|
|
{
|
|
_Crest_IntermediatesH[coord] = resultH;
|
|
_Crest_IntermediatesX[coord] = resultX;
|
|
_Crest_IntermediatesZ[coord] = resultZ;
|
|
}
|
|
}
|
|
|
|
float2 conj(float2 v)
|
|
{
|
|
return float2(v.x, -v.y);
|
|
}
|
|
|
|
[numthreads(TX,TY,1)]
|
|
void ComputeFFT(const uint3 id : SV_DispatchThreadID)
|
|
{
|
|
const uint coord = id.CHANNEL;
|
|
#if !FINAL
|
|
_Crest_IntermediatesH[coord] = conj(_Crest_Output1[id]);
|
|
_Crest_IntermediatesX[coord] = conj(_Crest_Output2[id]);
|
|
_Crest_IntermediatesZ[coord] = conj(_Crest_Output3[id]);
|
|
#else
|
|
_Crest_IntermediatesH[coord] = _Crest_InputH[id];
|
|
_Crest_IntermediatesX[coord] = _Crest_InputX[id];
|
|
_Crest_IntermediatesZ[coord] = _Crest_InputZ[id];
|
|
#endif
|
|
|
|
[unroll(PASSES)]
|
|
for (uint passIndex = 0; passIndex < PASSES; ++passIndex)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
ButterflyPass(_Crest_InputButterfly[uint2(coord, passIndex)].xy, coord, passIndex, id.z);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
const bool pingpong = (PASSES % 2) == 0;
|
|
const float2 resultH = pingpong ? _Crest_IntermediatesH[coord] : _Crest_ScratchH[coord];
|
|
const float2 resultX = pingpong ? _Crest_IntermediatesX[coord] : _Crest_ScratchX[coord];
|
|
const float2 resultZ = pingpong ? _Crest_IntermediatesZ[coord] : _Crest_ScratchZ[coord];
|
|
|
|
#if !FINAL
|
|
_Crest_Output1[id] = resultH;
|
|
_Crest_Output2[id] = resultX;
|
|
_Crest_Output3[id] = resultZ;
|
|
#else
|
|
const float sign = ((id.x + id.y) % 2) == 1 ? -1.0 : 1.0;
|
|
const float3 res = float3(sign * resultX.x, sign * resultH.x, sign * resultZ.x);
|
|
// Write zero to W to clear garbage.
|
|
_Crest_Output[id] = float4(sign * resultX.x, sign * resultH.x, sign * resultZ.x, 1.0);
|
|
#endif
|
|
}
|