Skip to content

Instantly share code, notes, and snippets.

@davidm
Created March 17, 2012 21:05
Show Gist options
  • Select an option

  • Save davidm/2065267 to your computer and use it in GitHub Desktop.

Select an option

Save davidm/2065267 to your computer and use it in GitHub Desktop.

Revisions

  1. davidm revised this gist Mar 23, 2012. 1 changed file with 95 additions and 14 deletions.
    109 changes: 95 additions & 14 deletions hamming_weight_test.lua
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,12 @@
    --[[
    Correctness and benchmark tests of various hamming weight implementations.
    See also http://lua-users.org/wiki/HammingWeight .
    This is also called "popcount".
    See also
    http://lua-users.org/wiki/HammingWeight
    http://stackoverflow.com/questions/109023/best-algorithm-to-count-the-number-of-set-bits-in-a-32-bit-integer
    http://www.dalkescientific.com/writings/diary/archive/2008/07/03/hakmem_and_other_popcounts.html
    http://perso.citi.insa-lyon.fr/claurado/ham/overview.pdf
    http://chessprogramming.wikispaces.com/Population+Count
    David Manura, 2012-03.
    --]]

    @@ -78,12 +84,12 @@ end
    local bit = requireany('bit', 'bit32')
    local rshift = bit.rshift
    local band = bit.band
    local extract8 = bit.extract or function(n, field, _) -- https://github.com/davidm/lua-bit-numberlua
    local extract8 = bit.extract or function(n, field, _)
    return band(rshift(n, field), 0xFF)
    end
    end -- https://github.com/davidm/lua-bit-numberlua

    -- Hamming weight of 32-bit integer.
    -- Simple implementation.
    -- Simple (naive) implementation.
    local function hw_simple(x)
    local sum = 0
    while x ~= 0 do
    @@ -94,33 +100,96 @@ local function hw_simple(x)
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on 32-bit version of popcount_2
    -- Implementation uses divide-and-conquer (tree), 32-bit version of popcount_2:
    -- http://en.wikipedia.org/wiki/Hamming_weight .
    -- Note: The modulo product (x * h01) in popcount_3 could overflow floating point.
    local MASK1 = 0x55555555 -- repeating 01 pattern
    local MASK2 = 0x33333333 -- repeating 0011 pattern
    local MASK4 = 0x0F0F0F0F -- repeating 00001111 pattern
    local function hw_pc2(x)
    local function hw_dc2bit(x)
    x = x - band(rshift(x, 1), MASK1) -- 2 bit sums
    x = band(x, MASK2) + band(rshift(x, 2), MASK2) -- 4 bit sums
    x = band(x + rshift(x, 4), MASK4) -- 8 bit sums
    return band(x + rshift(x, 8) + rshift(x, 16) + rshift(x, 24), 0xFF) -- sum of bytes
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on 8-bit table lookups (memoized).
    -- Implementation uses divide-and-conguer (tree), 3-bit variant of hw_dc2bit.
    -- Implementation provided by Egor Skriptunoff.
    -- A similar implementation is on http://stackoverflow.com/questions/109023/ .
    -- Based on HAKMEM #169:
    -- http://home.pipeline.com/~hbaker1/hakmem/hacks.html#item169 .
    local MASK001 = 0x49249249 -- repeating 001 pattern
    local MASK011 = 0xDB6DB6DB -- repeating 011 pattern
    local MASK3 = 0xC71C71C7 -- repeating 000111 pattern
    local function hw_dc3bit(x)
    x = x - band(rshift(x, 1), MASK011)
    - band(rshift(x, 2), MASK001) -- 3 bit sums
    x = band(x + rshift(x, 3), MASK3) -- 6 bit sums
    x = x + rshift(x, 6) -- 12 bit sums
    return band(x + rshift(x, 12) + rshift(x, 24), 0x3F)
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation uses divide-and-conquer (tree), similar to hw_dc3bit.
    -- Based on "Intel" solution in http://wiki.cs.pdx.edu/forge/popcount.html .
    local m1 = 0x55555555 -- repeating 01 pattern
    local m2 = 0xC30C30C3 -- repeating 000011 pattern
    local function hw_dci(x)
    x = x - band(rshift(x, 1), m1)
    x = band(x, m2) + band(rshift(x, 2), m2) + band(rshift(x, 4), m2)
    x = x + rshift(x, 6)
    return band(x + rshift(x, 12) + rshift(x, 24), 0x3f)
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation uses divide-and-conquer (tree), 4-bit variant of hw_dc3bit.
    -- Based on
    -- http://www.necessaryandsufficient.net/2009/04/optimising-bit-counting-using-iterative-data-driven-development/
    -- but without the (i * 0x01010101)>>24 trick.
    local function hw_dc4bit(x)
    x = x - band(rshift(x, 1), 0x77777777)
    - band(rshift(x, 2), 0x33333333)
    - band(rshift(x, 3), 0x11111111)
    x = band(x + rshift(x, 4), 0xF0F0F0F)
    return band(x + rshift(x, 8) + rshift(x, 16) + rshift(x, 24), 0xFF)
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on 8-bit lookup table (LUT), memoized.
    local HW = memoize(hw_simple)
    local function hw_t4(x)
    local function hw_lut8(x)
    local n0 = band(x, 255); local x = rshift(x, 8)
    local n1 = band(x, 255); local x = rshift(x, 8)
    local n2 = band(x, 255); local x = rshift(x, 8)
    local n3 = x
    return HW[n0] + HW[n1] + HW[n2] + HW[n3]
    end
    --[[ inferior on LuaJit
    local function hw_lut8(x)
    local n0 = extract8(x, 0, 8)
    local n1 = extract8(x, 8, 8)
    local n2 = extract8(x, 16, 8)
    local n3 = extract8(x, 24, 8)
    return HW[n0] + HW[n1] + HW[n2] + HW[n3]
    end
    --]]

    -- Hamming weight of 32-bit integer.
    -- Implementation based on 8-bit lookup table (LUT), memoized, without bitops
    local HW = memoize(hw_simple)
    local function hw_lut8a(x)
    local n0 = x % 256; local x = (x - n0) / 256
    local n1 = x % 256; local x = (x - n1) / 256
    local n2 = x % 256; local x = (x - n2) / 256
    local n3 = x
    return HW[n0] + HW[n1] + HW[n2] + HW[n3]
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on
    -- Peter Wegner 1960 - http://dx.doi.org/10.1145/367236.367286
    -- (also referenced in K&R 1988).
    local function hw_wegner(x)
    local sum = 0
    while x ~= 0 do
    @@ -138,6 +207,7 @@ local function test_hw(f)
    for x=0,1E+3 do
    test_hw_value(f, x)
    test_hw_value(f, 0xFFFFFFFF - x)
    test_hw_value(f, math.random(2^31-1))
    end
    end

    @@ -164,17 +234,25 @@ local function bench_hw(f, name)
    end

    -- test correctness
    test_hw(hw_pc2)
    test_hw(hw_t4)
    test_hw(hw_dc2bit)
    test_hw(hw_dc3bit)
    test_hw(hw_dc4bit)
    test_hw(hw_dci)
    test_hw(hw_lut8)
    test_hw(hw_lut8a)
    test_hw(hw_wegner)


    -- test performance
    for i=1,3 do
    bench_hw(hw_simple, 'hw_simple')
    bench_hw(hw_pc2, 'hw_pc2')
    bench_hw(hw_t4, 'hw_t4')
    bench_hw(hw_wegner, 'hw_wegner')
    bench_hw(hw_simple, 'hw_simple')
    bench_hw(hw_dc2bit, 'hw_dc2bit')
    bench_hw(hw_dc3bit, 'hw_dc3bit')
    bench_hw(hw_dc4bit, 'hw_dc4bit')
    bench_hw(hw_dci, 'hw_dci')
    bench_hw(hw_lut8, 'hw_lut8')
    bench_hw(hw_lut8a, 'hw_lut8a')
    bench_hw(hw_wegner, 'hw_wegner')
    print '---'
    end

    @@ -201,4 +279,7 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    THE SOFTWARE.
    [Note: individual hw_* function implementations minus comments are considered
    public domain, but citation in source is recommended.]
    --]]
  2. davidm revised this gist Mar 17, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion hamming_weight_test.lua
    Original file line number Diff line number Diff line change
    @@ -96,7 +96,7 @@ end
    -- Hamming weight of 32-bit integer.
    -- Implementation based on 32-bit version of popcount_2
    -- http://en.wikipedia.org/wiki/Hamming_weight .
    -- The modulo product (x * h01) in popcount_4 could overflow floating point.
    -- Note: The modulo product (x * h01) in popcount_3 could overflow floating point.
    local MASK1 = 0x55555555 -- repeating 01 pattern
    local MASK2 = 0x33333333 -- repeating 0011 pattern
    local MASK4 = 0x0F0F0F0F -- repeating 00001111 pattern
  3. davidm created this gist Mar 17, 2012.
    204 changes: 204 additions & 0 deletions hamming_weight_test.lua
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,204 @@
    --[[
    Correctness and benchmark tests of various hamming weight implementations.
    See also http://lua-users.org/wiki/HammingWeight .
    David Manura, 2012-03.
    --]]

    -- utility section --

    -- https://gist.github.com/2064991
    local function memoize(f)
    local mt = {}
    local t = setmetatable({}, mt)
    function mt:__index(k)
    local v = f(k); t[k] = v
    return v
    end
    return t
    end

    -- https://gist.github.com/1414923
    local function requireany(...)
    local errs = {}
    for i = 1, select('#', ...) do local name = select(i, ...)
    if type(name) ~= 'string' then return name, nil end
    local ok, mod = pcall(require, name)
    if ok then return mod, name end
    errs[#errs+1] = mod
    end
    error(table.concat(errs, '\n'), 2)
    end

    -- testing utility
    local OP = {}
    OP['=='] = function(a, b) return a == b end
    local function check(compare, a, b, name)
    local comparef = OP[compare]
    if not comparef(a, b) then
    error(name..': '..compare..tostring(a)..' '..tostring(b))
    end
    end

    -- benchmarking utility.
    -- Note: uses CPU time (clock function).
    -- `bench` repeatedly calls `f` for at least `min_seconds` seconds (defaults to 2).
    -- `f_count` is number of operations performed in `f` (defaults to 1).
    -- Returns number of seconds per operation.
    local clock = os.clock
    local function bench(min_seconds, f, f_count)
    min_seconds = min_seconds or 2
    f_count = f_count or 1
    local t1 = clock()
    local ntimes = 1
    local count = 0
    while 1 do
    for i=1,ntimes do
    f()
    end
    count = count + ntimes
    local t2 = clock()
    if t2 - t1 >= min_seconds then
    local period = (t2 - t1) / (count * f_count)
    return period
    end
    ntimes = ntimes * 2 -- exponential increase
    end
    end
    local function dump_bench(name, seconds)
    local ts = {}
    for i=1,#seconds do
    ts[i] = ('%0.1E'):format(seconds[i])
    end
    print(('%10s: %s s'):format(name, table.concat(ts, ' ')))
    end


    -- main code section --

    local bit = requireany('bit', 'bit32')
    local rshift = bit.rshift
    local band = bit.band
    local extract8 = bit.extract or function(n, field, _) -- https://github.com/davidm/lua-bit-numberlua
    return band(rshift(n, field), 0xFF)
    end

    -- Hamming weight of 32-bit integer.
    -- Simple implementation.
    local function hw_simple(x)
    local sum = 0
    while x ~= 0 do
    sum = sum + band(x, 1)
    x = rshift(x, 1)
    end
    return sum
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on 32-bit version of popcount_2
    -- http://en.wikipedia.org/wiki/Hamming_weight .
    -- The modulo product (x * h01) in popcount_4 could overflow floating point.
    local MASK1 = 0x55555555 -- repeating 01 pattern
    local MASK2 = 0x33333333 -- repeating 0011 pattern
    local MASK4 = 0x0F0F0F0F -- repeating 00001111 pattern
    local function hw_pc2(x)
    x = x - band(rshift(x, 1), MASK1) -- 2 bit sums
    x = band(x, MASK2) + band(rshift(x, 2), MASK2) -- 4 bit sums
    x = band(x + rshift(x, 4), MASK4) -- 8 bit sums
    return band(x + rshift(x, 8) + rshift(x, 16) + rshift(x, 24), 0xFF) -- sum of bytes
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on 8-bit table lookups (memoized).
    local HW = memoize(hw_simple)
    local function hw_t4(x)
    local n0 = extract8(x, 0, 8)
    local n1 = extract8(x, 8, 8)
    local n2 = extract8(x, 16, 8)
    local n3 = extract8(x, 24, 8)
    return HW[n0] + HW[n1] + HW[n2] + HW[n3]
    end

    -- Hamming weight of 32-bit integer.
    -- Implementation based on
    -- Peter Wegner 1960 - http://dx.doi.org/10.1145/367236.367286
    local function hw_wegner(x)
    local sum = 0
    while x ~= 0 do
    x = band(x, x-1)
    sum = sum + 1
    end
    return sum
    end

    local function test_hw_value(f, x)
    check('==', hw_simple(x), f(x), x)
    end

    local function test_hw(f)
    for x=0,1E+3 do
    test_hw_value(f, x)
    test_hw_value(f, 0xFFFFFFFF - x)
    end
    end

    -- note: test both high and low numbers to avoid bias in Wegner version.
    local function make_test_func(f)
    local M = 0xFFFFFFFF
    local count = 0; for _=0,1E+3,7 do count = count + 10 end
    return function()
    local result = 0 -- guard against unused computations optimizing away.
    for i=0,1E+3,7 do
    result = result + f(i) + f(i) + f(i) + f(i) + f(i)
    local i = M-i
    result = result + f(i) + f(i) + f(i) + f(i) + f(i)
    end
    return result
    end, count
    end

    local results = {}
    local function bench_hw(f, name)
    results[name] = results[name] or {}
    table.insert(results[name], bench(2.0, make_test_func(f)))
    dump_bench(name, results[name])
    end

    -- test correctness
    test_hw(hw_pc2)
    test_hw(hw_t4)
    test_hw(hw_wegner)


    -- test performance
    for i=1,3 do
    bench_hw(hw_simple, 'hw_simple')
    bench_hw(hw_pc2, 'hw_pc2')
    bench_hw(hw_t4, 'hw_t4')
    bench_hw(hw_wegner, 'hw_wegner')
    print '---'
    end

    print 'DONE'


    --[[
    (c) 2012 David Manura.
    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:
    The above copyright notice and this permission notice shall be included in
    all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    THE SOFTWARE.
    --]]