sylefeb · April 16, 2024 13:28
diff --git a/class_6_reduce.js b/class_6_reduce.js
 async function main()
 {
  const adapter = await navigator.gpu?.requestAdapter();
  const device = await adapter?.requestDevice();
  if (!device) {
    fail('need a browser that supports WebGPU');
    return;
  }

  const module = device.createShaderModule({
    label: 'compute module',
    code: `
      struct Params {
        T : i32, // number of threads launched
        P : i32, // side size of square grid (P*P >= T)
        S : i32, // step
      }

      @group(0) @binding(0) var<uniform>             param: Params;
      @group(0) @binding(1) var<storage, read      > E_in  : array<i32>;
      @group(0) @binding(2) var<storage, read_write> E_out : array<i32>;

      var<workgroup> temp : array<i32,64>;

      @compute @workgroup_size(64,1) fn computeSomething(
        @builtin(global_invocation_id) u_gid: vec3<u32>,
        @builtin(local_invocation_id)  u_lid: vec3<u32>,
        @builtin(workgroup_id)         u_wid: vec3<u32>,
      ) {
          var wid = vec3<i32>(u_wid);
          var lid = vec3<i32>(u_lid);
          let id  = (wid.x + wid.y * param.P) * 64 + lid.x;

          // first sum
          temp[lid.x] = E_in[id];

          workgroupBarrier();

          // internal sums
          var S = 64/2;
          while (S > 0) {
            if (lid.x + S < 64) {
              temp[lid.x] = temp[lid.x] + temp[lid.x + S];
            }
            workgroupBarrier();
            S = S / 2;
          }
          E_out[ (wid.x + wid.y * param.P) ] = temp[0];
      }
    `,
  });

  const piplayout_group0 = device.createBindGroupLayout({
    label: 'piplayout_group0',
    entries: [
      {
        binding: 0,
        visibility: GPUShaderStage.COMPUTE,
        buffer: {
          type: 'uniform',
        },
      },
      {
        binding: 1,
        visibility: GPUShaderStage.COMPUTE,
        buffer: {
          type: 'read-only-storage',
        },
      },
      {
        binding: 2,
        visibility: GPUShaderStage.COMPUTE,
        buffer: {
          type: 'storage',
        },
      }
    ],
  })

  const pipeline = device.createComputePipeline({
    label: 'compute pipeline',
    layout: device.createPipelineLayout({
        bindGroupLayouts: [piplayout_group0],
    }),
    compute: {
      module,
      entryPoint: 'computeSomething',
    },
  });

  G = 64;
  E = G*G*G; // 3 steps

  // GPU read/write buffer
  const bufferA = device.createBuffer({
    label: 'A',
    size: E * 4 /*u32*/,
    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
  });
  // place some initial data inside
 	const input = new Uint32Array(E);
  for (var n=0;n < E;++n) {
    input[n] = n;
  }
  // copy our init data from CPU to GPU
  device.queue.writeBuffer(bufferA, 0, input);
  // allocate a second buffer
  const bufferB = device.createBuffer({
    label: 'B',
    size: E * 4 /*u32*/,
    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
  });

  // create a buffer on the GPU to get a copy of the results
  const resultBuffer = device.createBuffer({
    label: 'result buffer',
    size: 4 /*a single u32*/,
    usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
  });

  // create a buffer for the uniform parameters
  const params = device.createBuffer({
    label: 'uniform buffer',
    size: 3 * 4 /*u32*/,
    usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
  });
  console.log(params)

  console.log('==== MAIN LOOP ====');

  var swap = 0; // exchange buffers

  while (E > 1) {
    // compute P, the size of the PxP dispatch grid
    // to have at least Math.ceil(T/G)) workgroups
    var T = E;
    var P = Math.ceil(Math.sqrt(Math.ceil(T/G)));
    console.log('E = ',E,'T = ',T,' P = ',P, ' P*P = ',P*P);
    if (P*P < T/G) {
      console.error('incorrect grid size');
    }

    // update uniform parameters
    device.queue.writeBuffer(params,0,new Uint32Array([T,P,0]));

    // update E
    E = E / G;

    // Setup a bindGroup to tell the shader which
    // buffer to use for the computation
    const bindGroup = device.createBindGroup({
      label: 'bindGroup',
      layout: pipeline.getBindGroupLayout(0),
      entries: [
        { binding: 0, resource: { buffer: params }, },
        { binding: 1 + swap, resource: { buffer: bufferA}, },
        { binding: 1 + (1-swap), resource: { buffer: bufferB}, },
      ],
    });
    swap = 1 - swap;
    // Encode commands to do the computation
    const encoder = device.createCommandEncoder();
    const pass = encoder.beginComputePass();
    pass.setPipeline(pipeline);
    pass.setBindGroup(0, bindGroup);
    pass.dispatchWorkgroups(P,P);
    pass.end();
    // Finish encoding and submit the commands
    const commandBuffer = encoder.finish();
    // Submit commands for this step
    device.queue.submit([commandBuffer]);
  }

  // Encode a command to copy the results to a mappable buffer.
  const encoder = device.createCommandEncoder();
  if (swap == 0) {
    encoder.copyBufferToBuffer(bufferA, 0, resultBuffer, 0, resultBuffer.size);
  } else {
    encoder.copyBufferToBuffer(bufferB, 0, resultBuffer, 0, resultBuffer.size);
  }
  // Finish encoding and submit the commands
  const commandBuffer = encoder.finish();
  // Submit commands for this step
  device.queue.submit([commandBuffer]);

  // Read the results
  await resultBuffer.mapAsync(GPUMapMode.READ);
  const result = new Uint32Array(resultBuffer.getMappedRange().slice());
  resultBuffer.unmap();

  console.log('result = ', result[0]);

 }

 function fail(msg) {
  // eslint-disable-next-line no-alert
  alert(msg);
 }

 main();
	async function main()
	{
	const adapter = await navigator.gpu?.requestAdapter();
	const device = await adapter?.requestDevice();
	if (!device) {
	fail('need a browser that supports WebGPU');
	return;
	}

	const module = device.createShaderModule({
	label: 'compute module',
	code: `
	struct Params {
	T : i32, // number of threads launched
	P : i32, // side size of square grid (P*P >= T)
	S : i32, // step
	}

	@group(0) @binding(0) var<uniform> param: Params;
	@group(0) @binding(1) var<storage, read > E_in : array<i32>;
	@group(0) @binding(2) var<storage, read_write> E_out : array<i32>;

	var<workgroup> temp : array<i32,64>;

	@compute @workgroup_size(64,1) fn computeSomething(
	@builtin(global_invocation_id) u_gid: vec3<u32>,
	@builtin(local_invocation_id) u_lid: vec3<u32>,
	@builtin(workgroup_id) u_wid: vec3<u32>,
	) {
	var wid = vec3<i32>(u_wid);
	var lid = vec3<i32>(u_lid);
	let id = (wid.x + wid.y * param.P) * 64 + lid.x;

	// first sum
	temp[lid.x] = E_in[id];

	workgroupBarrier();

	// internal sums
	var S = 64/2;
	while (S > 0) {
	if (lid.x + S < 64) {
	temp[lid.x] = temp[lid.x] + temp[lid.x + S];
	}
	workgroupBarrier();
	S = S / 2;
	}
	E_out[ (wid.x + wid.y * param.P) ] = temp[0];
	}
	`,
	});

	const piplayout_group0 = device.createBindGroupLayout({
	label: 'piplayout_group0',
	entries: [
	{
	binding: 0,
	visibility: GPUShaderStage.COMPUTE,
	buffer: {
	type: 'uniform',
	},
	},
	{
	binding: 1,
	visibility: GPUShaderStage.COMPUTE,
	buffer: {
	type: 'read-only-storage',
	},
	},
	{
	binding: 2,
	visibility: GPUShaderStage.COMPUTE,
	buffer: {
	type: 'storage',
	},
	}
	],
	})

	const pipeline = device.createComputePipeline({
	label: 'compute pipeline',
	layout: device.createPipelineLayout({
	bindGroupLayouts: [piplayout_group0],
	}),
	compute: {
	module,
	entryPoint: 'computeSomething',
	},
	});

	G = 64;
	E = GGG; // 3 steps

	// GPU read/write buffer
	const bufferA = device.createBuffer({
	label: 'A',
	size: E * 4 /u32/,
	usage: GPUBufferUsage.STORAGE \| GPUBufferUsage.COPY_DST \| GPUBufferUsage.COPY_SRC,
	});
	// place some initial data inside
	const input = new Uint32Array(E);
	for (var n=0;n < E;++n) {
	input[n] = n;
	}
	// copy our init data from CPU to GPU
	device.queue.writeBuffer(bufferA, 0, input);
	// allocate a second buffer
	const bufferB = device.createBuffer({
	label: 'B',
	size: E * 4 /u32/,
	usage: GPUBufferUsage.STORAGE \| GPUBufferUsage.COPY_DST \| GPUBufferUsage.COPY_SRC,
	});

	// create a buffer on the GPU to get a copy of the results
	const resultBuffer = device.createBuffer({
	label: 'result buffer',
	size: 4 /a single u32/,
	usage: GPUBufferUsage.MAP_READ \| GPUBufferUsage.COPY_DST,
	});

	// create a buffer for the uniform parameters
	const params = device.createBuffer({
	label: 'uniform buffer',
	size: 3 * 4 /u32/,
	usage: GPUBufferUsage.UNIFORM \| GPUBufferUsage.COPY_DST,
	});
	console.log(params)

	console.log('==== MAIN LOOP ====');

	var swap = 0; // exchange buffers

	while (E > 1) {
	// compute P, the size of the PxP dispatch grid
	// to have at least Math.ceil(T/G)) workgroups
	var T = E;
	var P = Math.ceil(Math.sqrt(Math.ceil(T/G)));
	console.log('E = ',E,'T = ',T,' P = ',P, ' PP = ',PP);
	if (P*P < T/G) {
	console.error('incorrect grid size');
	}

	// update uniform parameters
	device.queue.writeBuffer(params,0,new Uint32Array([T,P,0]));

	// update E
	E = E / G;

	// Setup a bindGroup to tell the shader which
	// buffer to use for the computation
	const bindGroup = device.createBindGroup({
	label: 'bindGroup',
	layout: pipeline.getBindGroupLayout(0),
	entries: [
	{ binding: 0, resource: { buffer: params }, },
	{ binding: 1 + swap, resource: { buffer: bufferA}, },
	{ binding: 1 + (1-swap), resource: { buffer: bufferB}, },
	],
	});
	swap = 1 - swap;
	// Encode commands to do the computation
	const encoder = device.createCommandEncoder();
	const pass = encoder.beginComputePass();
	pass.setPipeline(pipeline);
	pass.setBindGroup(0, bindGroup);
	pass.dispatchWorkgroups(P,P);
	pass.end();
	// Finish encoding and submit the commands
	const commandBuffer = encoder.finish();
	// Submit commands for this step
	device.queue.submit([commandBuffer]);
	}

	// Encode a command to copy the results to a mappable buffer.
	const encoder = device.createCommandEncoder();
	if (swap == 0) {
	encoder.copyBufferToBuffer(bufferA, 0, resultBuffer, 0, resultBuffer.size);
	} else {
	encoder.copyBufferToBuffer(bufferB, 0, resultBuffer, 0, resultBuffer.size);
	}
	// Finish encoding and submit the commands
	const commandBuffer = encoder.finish();
	// Submit commands for this step
	device.queue.submit([commandBuffer]);

	// Read the results
	await resultBuffer.mapAsync(GPUMapMode.READ);
	const result = new Uint32Array(resultBuffer.getMappedRange().slice());
	resultBuffer.unmap();

	console.log('result = ', result[0]);

	}

	function fail(msg) {
	// eslint-disable-next-line no-alert
	alert(msg);
	}

	main();
No results found