-
-
Save ninekorn/c473225a635ab24b6eb7287cee4a6f88 to your computer and use it in GitHub Desktop.
Sum operation on DirectCompute ( SharpDX )
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Collections.Generic; | |
| using System.Linq; | |
| using System.Text; | |
| using System.Threading.Tasks; | |
| using SharpDX; | |
| using SharpDX.Direct3D11; | |
| using SharpDX.Direct3D; | |
| using Buffer = SharpDX.Direct3D11.Buffer; | |
| namespace VectorSum | |
| { | |
| class Program | |
| { | |
| static void Main(string[] args) | |
| { | |
| // データの大きさ | |
| const int DATASIZE = 10000; | |
| // DirectXデバイスの作成 | |
| var device = new Device(DriverType.Hardware, DeviceCreationFlags.Debug); | |
| // シェーダのコンパイル | |
| var bytecode = SharpDX.D3DCompiler.ShaderBytecode.CompileFromFile("vectorsum.hlsl", "CS", "cs_4_0"); | |
| // コンピュートシェーダの作成 | |
| var cs = new ComputeShader(device, bytecode); | |
| // バイトコードの解放 | |
| bytecode.Dispose(); | |
| // 初期データの作成 | |
| var inputData = new float[DATASIZE]; | |
| var random = new Random(0); | |
| for (int i = 0; i < DATASIZE; i++) | |
| { | |
| inputData[i] = (float)random.NextDouble() * 10; | |
| } | |
| // シェーダの入出力に用いるバッファの作成 | |
| var inputDesc = new BufferDescription() | |
| { | |
| SizeInBytes = DATASIZE * 4, | |
| Usage = ResourceUsage.Default, | |
| BindFlags = BindFlags.ShaderResource | BindFlags.UnorderedAccess, | |
| OptionFlags = ResourceOptionFlags.BufferStructured, | |
| StructureByteStride = 4, | |
| }; | |
| var buffers = new Buffer[2]; | |
| buffers[0] = Buffer.Create(device, inputData, inputDesc); | |
| buffers[1] = Buffer.Create(device, inputData, inputDesc); | |
| // シェーダーに入力する定数バッファの作成 | |
| var inputCBDesc = new BufferDescription() | |
| { | |
| SizeInBytes = 16, | |
| Usage = ResourceUsage.Default, | |
| BindFlags = BindFlags.ConstantBuffer, | |
| CpuAccessFlags = CpuAccessFlags.None, | |
| OptionFlags = ResourceOptionFlags.None, | |
| StructureByteStride = 0 | |
| }; | |
| var constantBuffer = new Buffer(device, inputCBDesc); | |
| { | |
| device.ImmediateContext.UpdateSubresource(new int[] { DATASIZE, 0, 0, 0 }, constantBuffer); | |
| } | |
| // CPUにコピーするためのバッファの作成 | |
| var readBuckBufferDesc = new BufferDescription() | |
| { | |
| SizeInBytes = 4, | |
| Usage = ResourceUsage.Staging, | |
| BindFlags = BindFlags.None, | |
| OptionFlags = ResourceOptionFlags.None, | |
| StructureByteStride = 4, | |
| CpuAccessFlags = CpuAccessFlags.Read | |
| }; | |
| var readBackBuffer = new Buffer(device, readBuckBufferDesc); | |
| // シェーダに入力するSRVの作成 | |
| var srvDesc = new ShaderResourceViewDescription() | |
| { | |
| Format = SharpDX.DXGI.Format.Unknown, | |
| Dimension = ShaderResourceViewDimension.Buffer, | |
| Buffer = new ShaderResourceViewDescription.BufferResource() | |
| { | |
| ElementWidth = DATASIZE, | |
| } | |
| }; | |
| var srvs = new ShaderResourceView[2]; | |
| srvs[0] = new ShaderResourceView(device, buffers[0], srvDesc); | |
| srvs[1] = new ShaderResourceView(device, buffers[1], srvDesc); | |
| // シェーダからの出力を指定するUAVの作成 | |
| var uavDesc = new UnorderedAccessViewDescription() | |
| { | |
| Format = SharpDX.DXGI.Format.Unknown, | |
| Dimension = UnorderedAccessViewDimension.Buffer, | |
| Buffer = new UnorderedAccessViewDescription.BufferResource() | |
| { | |
| ElementCount = DATASIZE, | |
| } | |
| }; | |
| var uavs = new UnorderedAccessView[2]; | |
| uavs[0] = new UnorderedAccessView(device, buffers[0], uavDesc); | |
| uavs[1] = new UnorderedAccessView(device, buffers[1], uavDesc); | |
| // GPU処理の実行 | |
| var context = device.ImmediateContext; | |
| context.ComputeShader.Set(cs); | |
| context.ComputeShader.SetConstantBuffer(0, constantBuffer); | |
| int dataCount = DATASIZE; | |
| bool flag = false; | |
| do | |
| { | |
| // SRVとUAVを入れ替える | |
| flag = !flag; | |
| context.ComputeShader.SetShaderResource(0, null); | |
| context.ComputeShader.SetUnorderedAccessView(0, uavs[flag ? 1 : 0]); | |
| context.ComputeShader.SetShaderResource(0, srvs[flag ? 0 : 1]); | |
| // 定数バッファを更新 | |
| device.ImmediateContext.UpdateSubresource(new int[] { dataCount, 0, 0, 0 }, constantBuffer); | |
| // 実行 | |
| int threadGroup = (dataCount + 127) / 128; | |
| context.Dispatch(threadGroup, 1, 1); | |
| dataCount = threadGroup; | |
| } while (dataCount > 1); | |
| // GPUメモリから結果をコピー | |
| context.CopySubresourceRegion(buffers[flag ? 1 : 0], 0, new ResourceRegion(0, 0, 0, 4, 1, 1), readBackBuffer, 0); | |
| DataStream ds; | |
| var dataBox = context.MapSubresource(readBackBuffer, MapMode.Read, MapFlags.None, out ds); | |
| float result = ds.Read<float>(); | |
| context.UnmapSubresource(readBackBuffer, 0); | |
| // DirectXのリソースを全て解放 | |
| context.ClearState(); | |
| Utilities.Dispose(ref srvs[0]); | |
| Utilities.Dispose(ref srvs[1]); | |
| Utilities.Dispose(ref uavs[0]); | |
| Utilities.Dispose(ref uavs[1]); | |
| Utilities.Dispose(ref buffers[0]); | |
| Utilities.Dispose(ref buffers[1]); | |
| Utilities.Dispose(ref constantBuffer); | |
| Utilities.Dispose(ref readBackBuffer); | |
| Utilities.Dispose(ref cs); | |
| Utilities.Dispose(ref device); | |
| // 結果の比較 | |
| float sum = 0; | |
| for (int i = 0; i < DATASIZE; i++) | |
| { | |
| sum += inputData[i]; | |
| } | |
| Console.WriteLine("CPU=" + sum); | |
| Console.WriteLine("GPU=" + result); | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cbuffer CB : register(b0) | |
| { | |
| unsigned int g_iCount; | |
| unsigned int dummy0; | |
| unsigned int dummy1; | |
| unsigned int dummy2; | |
| } | |
| StructuredBuffer<float> Input : register(t0); | |
| RWStructuredBuffer<float> Result : register(u0); | |
| groupshared float shared_data[128]; | |
| [numthreads(128, 1, 1)] | |
| void CS(uint3 Gid : SV_GroupID, | |
| uint3 DTid : SV_DispatchThreadID, | |
| uint3 GTid : SV_GroupThreadID, | |
| uint GI : SV_GroupIndex) | |
| { | |
| if (DTid.x < g_iCount) { | |
| shared_data[GI] = Input[DTid.x]; | |
| } | |
| else { | |
| shared_data[GI] = 0; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 64) { | |
| shared_data[GI] += shared_data[GI + 64]; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 32) { | |
| shared_data[GI] += shared_data[GI + 32]; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 16) { | |
| shared_data[GI] += shared_data[GI + 16]; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 8) { | |
| shared_data[GI] += shared_data[GI + 8]; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 4) { | |
| shared_data[GI] += shared_data[GI + 4]; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 2) { | |
| shared_data[GI] += shared_data[GI + 2]; | |
| } | |
| GroupMemoryBarrierWithGroupSync(); | |
| if (GI < 1) { | |
| shared_data[GI] += shared_data[GI + 1]; | |
| } | |
| if (GI == 0) { | |
| Result[Gid.x] = shared_data[0]; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.