Skip to content

Instantly share code, notes, and snippets.

@ninekorn
Forked from oguna/Program.cs
Created January 1, 2023 16:17
Show Gist options
  • Select an option

  • Save ninekorn/c473225a635ab24b6eb7287cee4a6f88 to your computer and use it in GitHub Desktop.

Select an option

Save ninekorn/c473225a635ab24b6eb7287cee4a6f88 to your computer and use it in GitHub Desktop.
Sum operation on DirectCompute ( SharpDX )
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using SharpDX;
using SharpDX.Direct3D11;
using SharpDX.Direct3D;
using Buffer = SharpDX.Direct3D11.Buffer;
namespace VectorSum
{
class Program
{
static void Main(string[] args)
{
// データの大きさ
const int DATASIZE = 10000;
// DirectXデバイスの作成
var device = new Device(DriverType.Hardware, DeviceCreationFlags.Debug);
// シェーダのコンパイル
var bytecode = SharpDX.D3DCompiler.ShaderBytecode.CompileFromFile("vectorsum.hlsl", "CS", "cs_4_0");
// コンピュートシェーダの作成
var cs = new ComputeShader(device, bytecode);
// バイトコードの解放
bytecode.Dispose();
// 初期データの作成
var inputData = new float[DATASIZE];
var random = new Random(0);
for (int i = 0; i < DATASIZE; i++)
{
inputData[i] = (float)random.NextDouble() * 10;
}
// シェーダの入出力に用いるバッファの作成
var inputDesc = new BufferDescription()
{
SizeInBytes = DATASIZE * 4,
Usage = ResourceUsage.Default,
BindFlags = BindFlags.ShaderResource | BindFlags.UnorderedAccess,
OptionFlags = ResourceOptionFlags.BufferStructured,
StructureByteStride = 4,
};
var buffers = new Buffer[2];
buffers[0] = Buffer.Create(device, inputData, inputDesc);
buffers[1] = Buffer.Create(device, inputData, inputDesc);
// シェーダーに入力する定数バッファの作成
var inputCBDesc = new BufferDescription()
{
SizeInBytes = 16,
Usage = ResourceUsage.Default,
BindFlags = BindFlags.ConstantBuffer,
CpuAccessFlags = CpuAccessFlags.None,
OptionFlags = ResourceOptionFlags.None,
StructureByteStride = 0
};
var constantBuffer = new Buffer(device, inputCBDesc);
{
device.ImmediateContext.UpdateSubresource(new int[] { DATASIZE, 0, 0, 0 }, constantBuffer);
}
// CPUにコピーするためのバッファの作成
var readBuckBufferDesc = new BufferDescription()
{
SizeInBytes = 4,
Usage = ResourceUsage.Staging,
BindFlags = BindFlags.None,
OptionFlags = ResourceOptionFlags.None,
StructureByteStride = 4,
CpuAccessFlags = CpuAccessFlags.Read
};
var readBackBuffer = new Buffer(device, readBuckBufferDesc);
// シェーダに入力するSRVの作成
var srvDesc = new ShaderResourceViewDescription()
{
Format = SharpDX.DXGI.Format.Unknown,
Dimension = ShaderResourceViewDimension.Buffer,
Buffer = new ShaderResourceViewDescription.BufferResource()
{
ElementWidth = DATASIZE,
}
};
var srvs = new ShaderResourceView[2];
srvs[0] = new ShaderResourceView(device, buffers[0], srvDesc);
srvs[1] = new ShaderResourceView(device, buffers[1], srvDesc);
// シェーダからの出力を指定するUAVの作成
var uavDesc = new UnorderedAccessViewDescription()
{
Format = SharpDX.DXGI.Format.Unknown,
Dimension = UnorderedAccessViewDimension.Buffer,
Buffer = new UnorderedAccessViewDescription.BufferResource()
{
ElementCount = DATASIZE,
}
};
var uavs = new UnorderedAccessView[2];
uavs[0] = new UnorderedAccessView(device, buffers[0], uavDesc);
uavs[1] = new UnorderedAccessView(device, buffers[1], uavDesc);
// GPU処理の実行
var context = device.ImmediateContext;
context.ComputeShader.Set(cs);
context.ComputeShader.SetConstantBuffer(0, constantBuffer);
int dataCount = DATASIZE;
bool flag = false;
do
{
// SRVとUAVを入れ替える
flag = !flag;
context.ComputeShader.SetShaderResource(0, null);
context.ComputeShader.SetUnorderedAccessView(0, uavs[flag ? 1 : 0]);
context.ComputeShader.SetShaderResource(0, srvs[flag ? 0 : 1]);
// 定数バッファを更新
device.ImmediateContext.UpdateSubresource(new int[] { dataCount, 0, 0, 0 }, constantBuffer);
// 実行
int threadGroup = (dataCount + 127) / 128;
context.Dispatch(threadGroup, 1, 1);
dataCount = threadGroup;
} while (dataCount > 1);
// GPUメモリから結果をコピー
context.CopySubresourceRegion(buffers[flag ? 1 : 0], 0, new ResourceRegion(0, 0, 0, 4, 1, 1), readBackBuffer, 0);
DataStream ds;
var dataBox = context.MapSubresource(readBackBuffer, MapMode.Read, MapFlags.None, out ds);
float result = ds.Read<float>();
context.UnmapSubresource(readBackBuffer, 0);
// DirectXのリソースを全て解放
context.ClearState();
Utilities.Dispose(ref srvs[0]);
Utilities.Dispose(ref srvs[1]);
Utilities.Dispose(ref uavs[0]);
Utilities.Dispose(ref uavs[1]);
Utilities.Dispose(ref buffers[0]);
Utilities.Dispose(ref buffers[1]);
Utilities.Dispose(ref constantBuffer);
Utilities.Dispose(ref readBackBuffer);
Utilities.Dispose(ref cs);
Utilities.Dispose(ref device);
// 結果の比較
float sum = 0;
for (int i = 0; i < DATASIZE; i++)
{
sum += inputData[i];
}
Console.WriteLine("CPU=" + sum);
Console.WriteLine("GPU=" + result);
}
}
}
cbuffer CB : register(b0)
{
unsigned int g_iCount;
unsigned int dummy0;
unsigned int dummy1;
unsigned int dummy2;
}
StructuredBuffer<float> Input : register(t0);
RWStructuredBuffer<float> Result : register(u0);
groupshared float shared_data[128];
[numthreads(128, 1, 1)]
void CS(uint3 Gid : SV_GroupID,
uint3 DTid : SV_DispatchThreadID,
uint3 GTid : SV_GroupThreadID,
uint GI : SV_GroupIndex)
{
if (DTid.x < g_iCount) {
shared_data[GI] = Input[DTid.x];
}
else {
shared_data[GI] = 0;
}
GroupMemoryBarrierWithGroupSync();
if (GI < 64) {
shared_data[GI] += shared_data[GI + 64];
}
GroupMemoryBarrierWithGroupSync();
if (GI < 32) {
shared_data[GI] += shared_data[GI + 32];
}
GroupMemoryBarrierWithGroupSync();
if (GI < 16) {
shared_data[GI] += shared_data[GI + 16];
}
GroupMemoryBarrierWithGroupSync();
if (GI < 8) {
shared_data[GI] += shared_data[GI + 8];
}
GroupMemoryBarrierWithGroupSync();
if (GI < 4) {
shared_data[GI] += shared_data[GI + 4];
}
GroupMemoryBarrierWithGroupSync();
if (GI < 2) {
shared_data[GI] += shared_data[GI + 2];
}
GroupMemoryBarrierWithGroupSync();
if (GI < 1) {
shared_data[GI] += shared_data[GI + 1];
}
if (GI == 0) {
Result[Gid.x] = shared_data[0];
}
}
@ninekorn
Copy link
Author

ninekorn commented Jan 1, 2023

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment