Last active
April 16, 2025 16:04
-
-
Save geohot/0cad05378fcbaeb0dceec3e89e0d4d7b to your computer and use it in GitHub Desktop.
Revisions
-
geohot revised this gist
Oct 19, 2023 . No changes.There are no files selected for viewing
-
geohot created this gist
Oct 19, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,22 @@ __kernel void matmul(__global float* data0, const __global float* data1, const __global float* data2) { int gidx0 = get_group_id(1); /* 512 */ int gidx1 = get_group_id(0); /* 512 */ float2 acc0 = (float2)(0.0f,0.0f); float2 acc1 = (float2)(0.0f,0.0f); for (int ridx0 = 0; ridx0 < 512; ++ridx0) { float2 val0 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)))); float2 val1 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)+1024))); float2 val2 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)))); float2 val3 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)+1024))); (acc0).x = (((val0).x*(val2).x)+(acc0).x); (acc0).x = (((val0).y*(val3).x)+(acc0).x); (acc1).x = (((val1).x*(val2).x)+(acc1).x); (acc1).x = (((val1).y*(val3).x)+(acc1).x); (acc0).y = (((val0).x*(val2).y)+(acc0).y); (acc0).y = (((val0).y*(val3).y)+(acc0).y); (acc1).y = (((val1).x*(val2).y)+(acc1).y); (acc1).y = (((val1).y*(val3).y)+(acc1).y); } *((__global float2*)(data0+(gidx0*2048)+(gidx1*2))) = (float2)(float2)((acc0).x,(acc0).y); *((__global float2*)(data0+(gidx0*2048)+(gidx1*2)+1024)) = (float2)(float2)((acc1).x,(acc1).y); }