Skip to content

Instantly share code, notes, and snippets.

@xoofx
Created December 11, 2021 19:45
Show Gist options
  • Select an option

  • Save xoofx/9b20c0628f51cee7a4219b35e61bac7b to your computer and use it in GitHub Desktop.

Select an option

Save xoofx/9b20c0628f51cee7a4219b35e61bac7b to your computer and use it in GitHub Desktop.

Revisions

  1. xoofx created this gist Dec 11, 2021.
    85 changes: 85 additions & 0 deletions BenchDuffsDeviceMemoryCopy.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,85 @@
    // Twitter discussion https://twitter.com/nietras1/status/1469740763760181250
    // Benchmark of DuffsDevice mem copy vs plain Unsafe.CopyBlock
    // | Method | BufferSize | Mean | Error | StdDev |
    // |--------------- |----------- |-----------:|----------:|----------:|
    // | BenchDuffs | 8 | 2.627 ns | 0.0227 ns | 0.0213 ns |
    // | BenchBlockCopy | 8 | 2.608 ns | 0.0160 ns | 0.0150 ns |
    // | BenchDuffs | 32 | 2.889 ns | 0.0184 ns | 0.0172 ns |
    // | BenchBlockCopy | 32 | 2.394 ns | 0.0132 ns | 0.0123 ns |
    // | BenchDuffs | 128 | 6.083 ns | 0.0261 ns | 0.0231 ns |
    // | BenchBlockCopy | 128 | 3.904 ns | 0.0223 ns | 0.0208 ns |
    // | BenchDuffs | 1024 | 33.169 ns | 0.1734 ns | 0.1622 ns |
    // | BenchBlockCopy | 1024 | 10.607 ns | 0.0625 ns | 0.0584 ns |
    // | BenchDuffs | 4096 | 113.503 ns | 0.3058 ns | 0.2860 ns |
    // | BenchBlockCopy | 4096 | 31.632 ns | 0.0957 ns | 0.0895 ns |
    using System.Runtime.CompilerServices;
    using BenchmarkDotNet.Attributes;
    using BenchmarkDotNet.Running;

    public class Program
    {
    private int _bufferSize;
    private byte[] _bufferFrom;
    private byte[] _bufferTo;

    public static void Main(string[] args)
    {
    BenchmarkRunner.Run<Program>();
    }

    [Params(8, 32, 128, 1024, 4096)]
    public int BufferSize
    {
    get => _bufferSize;
    set
    {
    _bufferSize = value;
    _bufferFrom = new byte[value];
    _bufferTo = new byte[value];
    }
    }


    [Benchmark]
    public unsafe void BenchDuffs()
    {
    fixed (byte* from = _bufferFrom)
    fixed (byte* to = _bufferTo)
    DuffsDevice(to, from, (uint)BufferSize);
    }


    [Benchmark]
    public unsafe void BenchBlockCopy()
    {
    fixed (byte* from = _bufferFrom)
    fixed (byte* to = _bufferTo)
    Unsafe.CopyBlock(to, from, (uint)BufferSize);
    }

    static unsafe void DuffsDevice(byte* to, byte* from, uint count)
    {
    uint r = count & 7;
    from += r;
    to += r;
    switch (r)
    {
    case 7: to[-6] = from[-6]; goto case 6;
    case 6: to[-5] = from[-5]; goto case 5;
    case 5: to[-4] = from[-4]; goto case 4;
    case 4: to[-3] = from[-3]; goto case 3;
    case 3: to[-2] = from[-2]; goto case 2;
    case 2: to[-1] = from[-1]; goto case 1;
    case 1: to[0] = from[0]; break;
    case 0: break;
    }
    ulong* f = (ulong*)from;
    ulong* t = (ulong*)to;
    uint n = count / 8;
    while (n > 0)
    {
    t[n] = f[n];
    --n;
    }
    }
    }