Files
2026-03-05 00:14:42 +08:00

177 lines
6.0 KiB
Plaintext

// Crest Water System
// Copyright © 2024 Wave Harmonic. All rights reserved.
// Inspired by https://github.com/speps/GX-EncinoWaves
// First SIZE constant must match FFT_KERNEL_0_RESOLUTION in FFTCompute.cs
#pragma kernel ComputeFFT SIZE=8 PASSES=3 CHANNEL=x TX=8 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=8 PASSES=3 CHANNEL=y TX=1 TY=8 FINAL=1
#pragma kernel ComputeFFT SIZE=16 PASSES=4 CHANNEL=x TX=16 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=16 PASSES=4 CHANNEL=y TX=1 TY=16 FINAL=1
#pragma kernel ComputeFFT SIZE=32 PASSES=5 CHANNEL=x TX=32 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=32 PASSES=5 CHANNEL=y TX=1 TY=32 FINAL=1
#pragma kernel ComputeFFT SIZE=64 PASSES=6 CHANNEL=x TX=64 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=64 PASSES=6 CHANNEL=y TX=1 TY=64 FINAL=1
#pragma kernel ComputeFFT SIZE=128 PASSES=7 CHANNEL=x TX=128 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=128 PASSES=7 CHANNEL=y TX=1 TY=128 FINAL=1
#pragma kernel ComputeFFT SIZE=256 PASSES=8 CHANNEL=x TX=256 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=256 PASSES=8 CHANNEL=y TX=1 TY=256 FINAL=1
#pragma kernel ComputeFFT SIZE=512 PASSES=9 CHANNEL=x TX=512 TY=1 FINAL=0
#pragma kernel ComputeFFT SIZE=512 PASSES=9 CHANNEL=y TX=1 TY=512 FINAL=1
// Must match CASCADE_COUNT in FFTCompute.cs
#define CASCADE_COUNT 16
Texture2D<float2> _Crest_InputButterfly;
#if !FINAL
RWTexture2DArray<float2> _Crest_Output1;
RWTexture2DArray<float2> _Crest_Output2;
RWTexture2DArray<float2> _Crest_Output3;
#else
Texture2DArray<float2> _Crest_InputH;
Texture2DArray<float2> _Crest_InputX;
Texture2DArray<float2> _Crest_InputZ;
// Write zero to W to clear garbage.
RWTexture2DArray<float4> _Crest_Output;
#endif
groupshared float2 _Crest_IntermediatesH[SIZE];
groupshared float2 _Crest_ScratchH[SIZE];
groupshared float2 _Crest_IntermediatesX[SIZE];
groupshared float2 _Crest_ScratchX[SIZE];
groupshared float2 _Crest_IntermediatesZ[SIZE];
groupshared float2 _Crest_ScratchZ[SIZE];
// This was required for Intel machines, but not required for Apple Silicon.
#if defined(SHADER_API_METAL)
// reversebits function appears to not make it across to Metal
#if !defined(reversebits)
uint reversebits( uint x )
{
x = ((x >> 1) & 0x55555555u) | ((x & 0x55555555u) << 1);
x = ((x >> 2) & 0x33333333u) | ((x & 0x33333333u) << 2);
x = ((x >> 4) & 0x0f0f0f0fu) | ((x & 0x0f0f0f0fu) << 4);
x = ((x >> 8) & 0x00ff00ffu) | ((x & 0x00ff00ffu) << 8);
x = ((x >> 16) & 0xffffu) | ((x & 0xffffu) << 16);
return x;
}
#endif
#endif
void ButterflyPass(float2 butterfly, uint coord, uint passIndex, uint cascade)
{
uint indexA, indexB;
const uint offset = 1 << passIndex;
if ((coord / offset) % 2 == 1)
{
indexA = coord - offset;
indexB = coord;
}
else
{
indexA = coord;
indexB = coord + offset;
}
if (passIndex == 0)
{
indexA = reversebits(indexA) >> (32 - PASSES);
indexB = reversebits(indexB) >> (32 - PASSES);
}
const bool pingpong = (passIndex % 2) == 0;
float2 valueA_H, valueB_H;
float2 valueA_X, valueB_X;
float2 valueA_Z, valueB_Z;
if (pingpong)
{
valueA_H = _Crest_IntermediatesH[indexA];
valueB_H = _Crest_IntermediatesH[indexB];
valueA_X = _Crest_IntermediatesX[indexA];
valueB_X = _Crest_IntermediatesX[indexB];
valueA_Z = _Crest_IntermediatesZ[indexA];
valueB_Z = _Crest_IntermediatesZ[indexB];
}
else
{
valueA_H = _Crest_ScratchH[indexA];
valueB_H = _Crest_ScratchH[indexB];
valueA_X = _Crest_ScratchX[indexA];
valueB_X = _Crest_ScratchX[indexB];
valueA_Z = _Crest_ScratchZ[indexA];
valueB_Z = _Crest_ScratchZ[indexB];
}
const float2 weight = butterfly.xy;
const float2 weightedValueH = weight * valueB_H.r + weight.gr * valueB_H.g * float2(-1.0, 1.0);
const float2 weightedValueX = weight * valueB_X.r + weight.gr * valueB_X.g * float2(-1.0, 1.0);
const float2 weightedValueZ = weight * valueB_Z.r + weight.gr * valueB_Z.g * float2(-1.0, 1.0);
const float2 resultH = valueA_H + weightedValueH;
const float2 resultX = valueA_X + weightedValueX;
const float2 resultZ = valueA_Z + weightedValueZ;
if (pingpong)
{
_Crest_ScratchH[coord] = resultH;
_Crest_ScratchX[coord] = resultX;
_Crest_ScratchZ[coord] = resultZ;
}
else
{
_Crest_IntermediatesH[coord] = resultH;
_Crest_IntermediatesX[coord] = resultX;
_Crest_IntermediatesZ[coord] = resultZ;
}
}
float2 conj(float2 v)
{
return float2(v.x, -v.y);
}
[numthreads(TX,TY,1)]
void ComputeFFT(const uint3 id : SV_DispatchThreadID)
{
const uint coord = id.CHANNEL;
#if !FINAL
_Crest_IntermediatesH[coord] = conj(_Crest_Output1[id]);
_Crest_IntermediatesX[coord] = conj(_Crest_Output2[id]);
_Crest_IntermediatesZ[coord] = conj(_Crest_Output3[id]);
#else
_Crest_IntermediatesH[coord] = _Crest_InputH[id];
_Crest_IntermediatesX[coord] = _Crest_InputX[id];
_Crest_IntermediatesZ[coord] = _Crest_InputZ[id];
#endif
[unroll(PASSES)]
for (uint passIndex = 0; passIndex < PASSES; ++passIndex)
{
GroupMemoryBarrierWithGroupSync();
ButterflyPass(_Crest_InputButterfly[uint2(coord, passIndex)].xy, coord, passIndex, id.z);
}
GroupMemoryBarrierWithGroupSync();
const bool pingpong = (PASSES % 2) == 0;
const float2 resultH = pingpong ? _Crest_IntermediatesH[coord] : _Crest_ScratchH[coord];
const float2 resultX = pingpong ? _Crest_IntermediatesX[coord] : _Crest_ScratchX[coord];
const float2 resultZ = pingpong ? _Crest_IntermediatesZ[coord] : _Crest_ScratchZ[coord];
#if !FINAL
_Crest_Output1[id] = resultH;
_Crest_Output2[id] = resultX;
_Crest_Output3[id] = resultZ;
#else
const float sign = ((id.x + id.y) % 2) == 1 ? -1.0 : 1.0;
const float3 res = float3(sign * resultX.x, sign * resultH.x, sign * resultZ.x);
// Write zero to W to clear garbage.
_Crest_Output[id] = float4(sign * resultX.x, sign * resultH.x, sign * resultZ.x, 1.0);
#endif
}