Skip to content

Instantly share code, notes, and snippets.

@shunting314
Created December 30, 2025 01:24
Show Gist options
  • Select an option

  • Save shunting314/fc9324dc9222d4a8c1fb641e8d56bb4a to your computer and use it in GitHub Desktop.

Select an option

Save shunting314/fc9324dc9222d4a8c1fb641e8d56bb4a to your computer and use it in GitHub Desktop.
import torch
import os
def f(do_profile=True):
s = torch.cuda.Stream()
with torch.profiler.profile() as p:
for _ in range(2):
m = torch.randn(10240, 10240, dtype=torch.bfloat16, device="cuda")
t = torch.empty(1024, device="cuda")
print(t.data_ptr())
with torch.cuda.stream(s):
for _ in range(1):
m = m @ m
t.fill_(1)
t.record_stream(s)
del t
u = torch.full((1024,), 2, device="cuda")
for _ in range(10):
m = m @ m
print(u.data_ptr())
print(u.sum())
if do_profile:
path = "/tmp/trace.json"
p.export_chrome_trace(path)
print(f"Trace written to {path}")
f()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment