Skip to content

Instantly share code, notes, and snippets.

@geohot
Last active April 16, 2025 16:04
Show Gist options
  • Select an option

  • Save geohot/0cad05378fcbaeb0dceec3e89e0d4d7b to your computer and use it in GitHub Desktop.

Select an option

Save geohot/0cad05378fcbaeb0dceec3e89e0d4d7b to your computer and use it in GitHub Desktop.

Revisions

  1. geohot revised this gist Oct 19, 2023. No changes.
  2. geohot created this gist Oct 19, 2023.
    22 changes: 22 additions & 0 deletions matmul.cl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,22 @@
    __kernel void matmul(__global float* data0, const __global float* data1, const __global float* data2) {
    int gidx0 = get_group_id(1); /* 512 */
    int gidx1 = get_group_id(0); /* 512 */
    float2 acc0 = (float2)(0.0f,0.0f);
    float2 acc1 = (float2)(0.0f,0.0f);
    for (int ridx0 = 0; ridx0 < 512; ++ridx0) {
    float2 val0 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2))));
    float2 val1 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)+1024)));
    float2 val2 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048))));
    float2 val3 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)+1024)));
    (acc0).x = (((val0).x*(val2).x)+(acc0).x);
    (acc0).x = (((val0).y*(val3).x)+(acc0).x);
    (acc1).x = (((val1).x*(val2).x)+(acc1).x);
    (acc1).x = (((val1).y*(val3).x)+(acc1).x);
    (acc0).y = (((val0).x*(val2).y)+(acc0).y);
    (acc0).y = (((val0).y*(val3).y)+(acc0).y);
    (acc1).y = (((val1).x*(val2).y)+(acc1).y);
    (acc1).y = (((val1).y*(val3).y)+(acc1).y);
    }
    *((__global float2*)(data0+(gidx0*2048)+(gidx1*2))) = (float2)(float2)((acc0).x,(acc0).y);
    *((__global float2*)(data0+(gidx0*2048)+(gidx1*2)+1024)) = (float2)(float2)((acc1).x,(acc1).y);
    }