"""
one_bit_difference.py

Written September / October 2023 by Josiah Carlson - josiah.carlson@gmail.com
Released into the public domain. Please include this authorship note / reference
to this GIST if you use / derive this software. Thank you.

Been thinking about this algorithm for about 17 years, haven't seen it around.
I might not know the proper literature search to find relevant references, or if
this was already done.

This algorithm finds all 1-bit differences in provided integers in
O(num_items * num_bits) time. We directly examine every bit approximately once
during our algorithm, may subtract up to num_items of up to num_bits, and may
mask a total of num_items for up to num_bits. Overall, this gives us a
worst-case constant for potentially large-number bit examinations at 3. For
random numbers, and even chosen 1-bit difference groups, we usually see a total
of closer to 2.3-2.5 in practice for non-sequential data, as execution constants
vary depending on the data bits themselves.

Overall Algorithm:
1. Insert all items into a prefix trie
2. From leaf to root, perform the "find_difference_merge()" function defined
  below, with leaves having bpos=0, their result having bpos=1, etc.
3. your output should contain all pairs of 1-bit difference values

find_difference_merge:
1. if only one leaf has data, or there is only one leaf, return that one leaf
2. Given that zeroes and ones are already individually sorted, up to bpos bits,
  we can zipper-compare the lowest bpos bits
  a. if the lower bpos bits are the same, emit the match, advance to the next
     item in both sequences
  b. if the zeroes item is less, advance to the next zeroes item
  c. else the ones item is less, advance to the next ones item
^^^ each step in #2 advances at least 1 item in zeroes, ones, or both lists

-- we count "comparisons" as subtractions in our metrics, as most platforms get
   at least == 0 and <0 as easy checks for integer and infinite-precision
   integer operations.

Cost of creating trie: O(num_items * num_bits) bit comparisons
Cost of traversing trie from leaf to root: O(num_items * comparisons_per_item)
  comparisons_per_item <= num_bits - 1

In random inputs, sequential inputs, and explicitly generated 1-bit-difference
graphs, we see "subtractions_per_bit" never go above .75.
* for low-match scenarios, we will see masks / subtractions approach 1 from above
* for high-match scenarios, we will see masks / subtractions approach 2 + epsilon
  from below

"""


from itertools import count
import os


# metrics counters
check_bit = count()
masks = count()
subtractions = count()
control_flow = count()
traverse = count()


def find_difference_merge(output: list, zeroes: list, ones: list, bpos: int) -> list:
  """
  Args:
    output - list to produce pairs of 1-bit differences
    zeroes - list of items with a 0 in the bpos bit position
    ones - list of items with a 1 in the bpos bit position
    bpos - which bit position we are checking

  Given two lists of sorted items with identical prefixes of n-bpos-1 bits, we
  are to calculate all pairs of items with 1 total bit of difference.

  Side-effects:
    Appends (smaller, larger) pairs of values with 1 bit differences to output,
    and returns combined list of zeroes + ones.
  """
  next(control_flow)
  if not zeroes:
    return ones
  if not ones:
    return zeroes
  zp = 0
  op = 0
  lz = len(zeroes)
  lo = len(ones)
  mask = (1 << bpos) - 1
  next(masks)
  next(masks)
  zer = zeroes[zp] & mask
  one = ones[op] & mask
  while zp < lz and op < lo:
    next(control_flow)
    # Really want a 3-way comparison, there's an assembly instruction for that,
    # or even just I86 cmov I suspect.
    # print("comparing", bin(zer), zer, bin(one),one,  bin(mask))
    delta = zer - one
    next(subtractions)
    if delta == 0:
      output.append((zeroes[zp], ones[op]))
      zp += 1
      op += 1
      if zp < lz:
        zer = zeroes[zp] & mask
        next(masks)
      if op < lo:
        one = ones[op] & mask
        next(masks)
      next(control_flow) # op < lo and zp < lz
    elif delta < 0:
      zp += 1
      if zp < lz:
        zer = zeroes[zp] & mask
        next(masks)
      next(control_flow) # zp < lz
    else:
      op += 1
      if op < lo:
        one = ones[op] & mask
        next(masks)
      next(control_flow) # op < lo
    next(control_flow) # delta == 0 or delta < 0

  next(control_flow) # loop check exit
  # all zeroes come before all ones
  return zeroes + ones

def construct_trie(items: list, bits: int) -> tuple:
  """
  Args:
    items - list of items to partition into a bitwise trie
    bits - how many bits to partition this list of items  

  Returns:
    Recursively-partitioned trie structure.
    construct_trie([3,1,2], 2) -> (([], [1]), ([2], [3]))
  """
  mask = 1 << bits
  zo = [], []
  for item in items:
    # we are literally examining the bits
    next(check_bit)
    zo[bool(item & mask)].append(item)
  return tuple(
    (construct_trie(zo_i, bits-1) if zo_i and bits else zo_i) for zo_i in zo)


def _traverse_trie(output: list, this_part: tuple, bits: int) -> list:
  if isinstance(this_part, list):
    return this_part

  next(traverse)
  this_part = tuple(
      (_traverse_trie(output, part, bits - 1) if part else part) for part in this_part
  )
  
  return find_difference_merge(output, this_part[0], this_part[1], bits)


def find_difference_caller(items: list, bits: int, metrics=True) -> list:
  """
  Args:
    items - list of items to compare for 1-bit differences
    bits - number of bits to compare for differences

  Returns:
    [(low, high), ...] for all pairs low and high from items differing in 1 bit.
  """
  if metrics:
    names = 'check_bit masks subtractions control_flow traverse'.split()
    gl = globals()
    for name in names:
      gl[name] = count()
  
  trie = construct_trie(items, bits-1)
  output = []
  _traverse_trie(output, trie, bits-1)      
  if metrics:
    for name in names:
      print(name, "per bit", (next(gl[name]) - 1) / len(items) / bits)
  
  return output


def one_off_seq(count: int, bits: int, step: int=1) -> list:
  """
  Args:
    count - number of items to return
    bits - number of bits in each item
    step - how many bits to step (will check every bit, otherwise)

  Returns:
    list of items with as many entries having <bits> 1-bit differences as
    possible.
  """
  out = some_random_vals(1, bits)
  known = set(out)
  i = 0
  while len(out) < count:
    it = out[i]
    for j in range(0, bits, step):
      iit = it ^ (1 << ((j + i) % bits))
      if iit not in known:
        known.add(iit)
        out.append(iit)
    i += 1
  return out


def some_random_vals(count: int, bits: int) -> list:
  """
  Args:
    count - number of items wanted
    bits - number of bits in each item

  Returns:
    List of <count> unique items generated from os.urandom() of <bits> length.
  """ 
  mask = (2**bits)-1
  assert count <= (mask>>1)
  read = (7 + bits) >> 3
  out = set()
  while len(out) < count:
    it = int.from_bytes(os.urandom(read), 'big') & mask
    out.add(it)
  return list(out)


if __name__ == "__main__":
  bits = 24
  for cnt in (512, 4096, 65536):
    # average case
    items = some_random_vals(cnt, bits)
    print("random", len(find_difference_caller(items, bits, True)))

    # best case
    # adjacent items are super easy to process, and have a lot of 1-bit
    # differences
    items = list(range(cnt))
    print("sequential", len(find_difference_caller(items, bits, True)))

    # these actually have fewer 1-off differences than sequential items, which
    # is due to our latter construction of items that are 1 bit different from
    # earlier items, but the earliest items have 1-bit differences from <bits>
    # items
    items = one_off_seq(cnt, bits)
    print("explicit", len(find_difference_caller(items, bits, True)))