Skip to content

Instantly share code, notes, and snippets.

@pabloem
Forked from anonymous/gist:4615715
Last active August 29, 2015 14:07
Show Gist options
  • Select an option

  • Save pabloem/2b4e9f05fd26cfedc234 to your computer and use it in GitHub Desktop.

Select an option

Save pabloem/2b4e9f05fd26cfedc234 to your computer and use it in GitHub Desktop.

Revisions

  1. @invalid-email-address Anonymous created this gist Jan 23, 2013.
    417 changes: 417 additions & 0 deletions gistfile1.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,417 @@
    #pragma once

    #pragma warning(push)
    #pragma warning(disable:4996)

    #include <cassert>
    #include <memory>
    #include <vector>



    template<typename T>
    class compact_vector
    {
    std::vector<std::unique_ptr<T[]>> index_block; // Points to data blocks.

    // When we deallocate, we keep the last block around so we can reuse it.
    // This is required to get O(1) push/pop. Could leave it in index_block instead,
    // and store a bit to indicate this. Tracking it separately makes things cleaner.
    std::unique_ptr<T[]> spare_empty_db;

    unsigned int num_elems;
    unsigned int num_elems_in_last_data_block;

    // Super blocks have no physical representation, but tracking which one we're in
    // allows us to compute the size of data blocks easily.
    unsigned int num_super_blocks;

    // We could compute the number of DBs in all SBs, like in 'locate_data_block_and_elem',
    // and compute this next value as needed by subtracting from that index_block.size().
    // However, this is reasonably expensive, so we'll just track it as we go instead.
    unsigned int num_dbs_in_last_sb;

    // How many data blocks fit in a super block
    __forceinline static unsigned int super_block_size(unsigned int super_block_index)
    {
    return 1 << (super_block_index/2);
    }

    // How many elements fit in a data block for a super block
    __forceinline static unsigned int data_block_size(unsigned int super_block_index)
    {
    return 1 << ((super_block_index+1)/2);
    }

    // Compute the block index and element index within that block for a given "virtual" index.
    //__forceinline static void locate_data_block_and_elem2(unsigned int i, unsigned int& element_within_block, unsigned int& data_block_index)
    //{
    // const long r = i+1;
    // unsigned long super_block_index;
    // _BitScanReverse(&super_block_index, r);

    //
    // unsigned int num_data_blocks_before_super_block = 2 * ((1 << (super_block_index/2))-1);

    // if ( super_block_index&1 )
    // {
    // num_data_blocks_before_super_block += 1 << (super_block_index/2);
    // }

    // const unsigned int e_bit_count = (super_block_index+1)/2;

    // // the data block index within the superblock
    // element_within_block = r & ((1 << e_bit_count) - 1);

    // long r_msb_cleared = r;
    // _bittestandreset( &r_msb_cleared, super_block_index);
    // const unsigned int block_index_within_super_block = r_msb_cleared >> e_bit_count;
    //
    //

    // data_block_index = num_data_blocks_before_super_block + block_index_within_super_block;
    //}

    __forceinline static void locate_data_block_and_elem(unsigned int i, unsigned int& element_within_block, unsigned int& data_block_index)
    {
    int r = i+1;
    unsigned long k;
    _BitScanReverse(&k, r);

    // There's some intentional redundancy in these two branches
    // Ends up slightly faster this way.
    if (k & 1)
    {
    const int floorKO2 = k / 2;
    const int ceilKO2 = floorKO2 + 1;
    const int powFloorKO2 = (1 << floorKO2);
    const int p = 2 * (powFloorKO2 - 1);
    const int b = (r >> ceilKO2) & (powFloorKO2 - 1);

    element_within_block = r & ((1 << ceilKO2) - 1);
    data_block_index = p+powFloorKO2+b;
    }
    else
    {
    const int floorKO2 = k / 2;
    const int powFloorKO2 = (1 << floorKO2);
    const int p = 2 * (powFloorKO2 - 1);
    const int b = (r >> floorKO2) & (powFloorKO2 - 1);

    element_within_block = r & (powFloorKO2 - 1);
    data_block_index = p+b;
    }
    }

    __forceinline void allocate_data_block()
    {
    if (spare_empty_db)
    {
    index_block.push_back(std::move(spare_empty_db));
    }
    else
    {
    index_block.emplace_back(new T[data_block_size(num_super_blocks-1)]);
    }
    }

    // Reserves space for one more element
    __forceinline void grow()
    {
    // Treat the very first allocation specially.
    // This is so we don't have to keep checking for this special case in the two
    // size calculations later.
    if (num_super_blocks == 0)
    {
    allocate_data_block();
    num_super_blocks = 1;
    num_dbs_in_last_sb = 1;
    num_elems_in_last_data_block = 1;
    num_elems = 1;
    return;
    }

    // Do we need to allocate?
    if (num_elems_in_last_data_block == data_block_size(num_super_blocks-1))
    {
    if (num_dbs_in_last_sb == super_block_size(num_super_blocks-1)) // track super block count
    {
    ++num_super_blocks;
    num_dbs_in_last_sb = 0;
    }

    allocate_data_block();

    ++num_dbs_in_last_sb;
    num_elems_in_last_data_block = 0;
    }

    ++num_elems;
    ++num_elems_in_last_data_block;
    }

    // Relinquishes space for one element
    __forceinline void shrink()
    {
    --num_elems;
    --num_elems_in_last_data_block;

    if (num_elems_in_last_data_block==0)
    {
    // Last block is empty, store it in our spare.
    // This causes deallocation if we already had a spare.
    spare_empty_db = std::move(index_block.back());
    index_block.pop_back();

    --num_dbs_in_last_sb;

    if (num_dbs_in_last_sb == 0) // track super block count
    {
    --num_super_blocks;
    num_dbs_in_last_sb = super_block_size(num_super_blocks-1);
    }

    // The new last data block is completely full
    num_elems_in_last_data_block = data_block_size(num_super_blocks-1);
    }
    }

    public:


    compact_vector() :
    num_elems(0),
    num_super_blocks(0),
    num_elems_in_last_data_block(0),
    num_dbs_in_last_sb(0)
    {
    }

    compact_vector(compact_vector&& other) :
    num_elems(other.num_elems),
    num_super_blocks(other.num_super_blocks),
    num_elems_in_last_data_block(other.num_elems_in_last_data_block),
    num_dbs_in_last_sb(other.num_dbs_in_last_sb),
    spare_empty_db(std::move(other.spare_empty_db)),
    index_block(std::move(other.index_block))
    {
    }

    compact_vector(const compact_vector& other) :
    num_elems(other.num_elems),
    num_super_blocks(0),
    num_elems_in_last_data_block(other.num_elems_in_last_data_block),
    num_dbs_in_last_sb(other.num_dbs_in_last_sb)
    {

    // Step through super blocks, copying data blocks
    // of the appropriate size from the other array
    auto block = other.index_block.begin();
    while(block != other.index_block.end())
    {
    // Step through the book-keeping variable for super blocks. This lets us know how
    // many data blocks to copy for this super block, and how big each one is.
    ++num_super_blocks;
    const unsigned int last_super_block_size = super_block_size(num_super_blocks-1);
    const unsigned int last_data_block_size = data_block_size(num_super_blocks-1);

    // This inner loop will fill up a whole super block, unless we run out
    for (unsigned int db=0; db < last_super_block_size && block != other.index_block.end(); ++db )
    {
    // Allocate data block of correct size
    index_block.emplace_back(new T[last_data_block_size]);

    // Copy data block over.
    // TODO: This also copies unused elems in last db (harmless, but wasteful).
    std::copy_n(block->get(), last_data_block_size, index_block.back().get());
    ++block;
    }
    }

    assert(num_elems == other.num_elems);
    assert(index_block.size() == other.index_block.size());
    assert(num_super_blocks == other.num_super_blocks);
    assert(num_dbs_in_last_sb == other.num_dbs_in_last_sb);
    assert(num_elems_in_last_data_block == other.num_elems_in_last_data_block);
    }

    compact_vector& operator=(compact_vector&& other)
    {
    num_elems = other.num_elems;
    num_super_blocks = other.num_super_blocks;
    num_elems_in_last_data_block = other.num_elems_in_last_data_block;
    num_dbs_in_last_sb = other.num_dbs_in_last_sb;
    spare_empty_db = std::move(other.spare_empty_db);
    index_block = std::move(other.index_block);

    return *this;
    }

    compact_vector& operator=(const compact_vector& other)
    {
    if (this != &other)
    {
    *this = compact_vector(other); // copy constructor + move-assignment
    }
    return *this;
    }

    void clear()
    {
    num_elems=num_super_blocks=last_data_block_size=num_elems_in_last_data_block=last_super_block_size=num_dbs_in_last_sb=0;
    index_block.clear();
    spare_empty_db.reset();
    }

    unsigned int size() const
    {
    return num_elems;
    }

    T& back()
    {
    return index_block.back()[num_elems_in_last_data_block-1];
    }

    const T& back() const
    {
    return index_block.back()[num_elems_in_last_data_block-1];
    }

    void push_back( T&& x)
    {
    grow();
    // todo: allocate raw memory instead, and run constructor on last elem here
    back() = std::move(x);
    }

    void push_back(const T& x)
    {
    grow();
    // todo: allocate raw memory instead, and run constructor on last elem here
    back() = x;
    }

    void pop_back()
    {
    // todo: once we run constructors manually, run destructor on last elem here
    shrink();
    }

    __forceinline T& operator[](unsigned int i)
    {
    unsigned int e, db;
    locate_data_block_and_elem(i,e,db);
    return index_block[db][e];
    }

    __forceinline const T& operator[](unsigned int i) const
    {
    unsigned int e, db;
    locate_data_block_and_elem(i,e,db);
    return index_block[db][e];
    }



    // iterator stuff
    class iterator
    {
    T* elem;
    T* current_db_end;

    unsigned int current_db;
    unsigned int current_sb;
    unsigned int last_db_in_sb;
    compact_vector* arr;

    public:

    iterator() : elem(nullptr) {};

    iterator( compact_vector* the_array ) :
    current_sb(0),
    last_db_in_sb(0),
    current_db(0),
    arr(the_array)
    {
    if ( arr->index_block.size() > 0 )
    {
    elem = arr->index_block[0].get();
    current_db_end = elem+1; // first db is always of size 1
    }
    else
    {
    elem = current_db_end = nullptr;
    }
    }

    bool operator==( const iterator& other )
    {
    return elem == other.elem;
    }

    bool operator!=( const iterator& other )
    {
    return elem != other.elem;
    }

    T& operator*()
    {
    return *elem;
    }

    iterator& operator++()
    {
    if (++elem == current_db_end)
    {
    if (++current_db == arr->index_block.size() )
    {
    // we've reached the end of the whole array
    elem = nullptr;
    }
    else
    {
    elem = arr->index_block[current_db].get();

    // Track which super block we're in, so we can use this
    // to compute the size of the data block later on
    if(current_db > last_db_in_sb)
    {
    ++current_sb;
    last_db_in_sb += super_block_size(current_sb);
    }

    if ( elem == arr->index_block.back().get() )
    {
    // we're in the last block, it may not be completely full
    // so we need to treat it specially
    current_db_end = elem + arr->num_elems_in_last_data_block;
    }
    else
    {
    // Not the lats block, so it must be a full block, compute size
    // using superblock index from before
    current_db_end = elem + data_block_size(current_sb);
    }

    }
    }

    return *this;
    }
    };

    iterator begin()
    {
    return iterator(this);
    }

    iterator end()
    {
    return iterator();
    }
    };


    #pragma warning(pop)