Chillee · April 12, 2024 05:13 · Apr 12, 2024
diff --git a/attention_dim_bench.py b/attention_dim_bench.py
@@ -0,0 +1,26 @@
+import torch
+from torch.utils.flop_counter import FlopCounterMode
+from triton.testing import do_bench
+torch.set_default_device('cuda')
+
+def get_flops_achieved(f):
+    flop_counter = FlopCounterMode(display=False)
+    with flop_counter:
+        f()
+    total_flops = flop_counter.get_total_flops()
+    ms_per_iter = do_bench(f)
+    iters_per_second = 1e3/ms_per_iter
+    print(f"{iters_per_second * total_flops / 1e12} TF/s")
+
+
+def attention(q, k, v):
+    return torch.softmax(q @ k.T, dim=-1) @ v
+
+S = 4096
+D = 256
+for D in [64, 128, 256, 512, 1024]:
+    q = torch.randn(S, D, dtype=torch.bfloat16)
+    k = torch.randn(S, D, dtype=torch.bfloat16)
+    v = torch.randn(S, D, dtype=torch.bfloat16)
+    print(f"D={D}")
+    get_flops_achieved(lambda: attention(q, k, v))
No results found