#include #include #include #include #include #include #include #include #include const size_t kNumVerts = 20000; const size_t kNumTris = 40000; const size_t kMaxBoneIndexInTri = 30; const size_t kMaxNumBonesInSubset = 26; struct Bitset512 { uint64_t bits[8]; Bitset512() { memset(&bits[0], 0, sizeof(bits)); } void clear() { memset(&bits[0], 0, sizeof(bits)); } void setBit(size_t bitIndex) { if (bitIndex >= 512) return; // Out of bounds, do nothing size_t arrayIndex = bitIndex / 64; size_t bitPosition = bitIndex % 64; bits[arrayIndex] |= (1ULL << bitPosition); } bool getBit(size_t bitIndex) const { if (bitIndex >= 512) return false; // Out of bounds, return false size_t arrayIndex = bitIndex / 64; size_t bitPosition = bitIndex % 64; return (bits[arrayIndex] & (1ULL << bitPosition)) != 0; } void resetBit(size_t bitIndex) { if (bitIndex >= 512) return; // Out of bounds, do nothing size_t arrayIndex = bitIndex / 64; size_t bitPosition = bitIndex % 64; bits[arrayIndex] &= ~(1ULL << bitPosition); } size_t countBits() const { size_t count = 0; for (size_t i = 0; i < 8; ++i) { count += __popcnt64(bits[i]); } return count; } size_t countMerged(const Bitset512& other) const { size_t count = 0; for (size_t i = 0; i < 8; ++i) { uint64_t intersection = bits[i] | other.bits[i]; count += __popcnt64(intersection); } return count; } void mergeInto(const Bitset512& other) { for (size_t i = 0; i < 8; ++i) { bits[i] = bits[i] | other.bits[i]; } } }; struct Vertex { uint8_t boneIndices[4]; }; struct Tri { Bitset512 boneIndices; }; void get_tri_bitset(const Vertex& v0, const Vertex& v1, const Vertex& v2, Bitset512& out) { out.clear(); out.setBit(v0.boneIndices[0]); out.setBit(v0.boneIndices[1]); out.setBit(v0.boneIndices[2]); out.setBit(v0.boneIndices[3]); out.setBit(v1.boneIndices[0]); out.setBit(v1.boneIndices[1]); out.setBit(v1.boneIndices[2]); out.setBit(v1.boneIndices[3]); out.setBit(v2.boneIndices[0]); out.setBit(v2.boneIndices[1]); out.setBit(v2.boneIndices[2]); out.setBit(v2.boneIndices[3]); } struct Subset { std::vector triIndices; }; void print_subsets(const std::vector& subsets, const Tri* tris) { printf("Num subsets: %d\n", int(subsets.size())); for (size_t i = 0; i < subsets.size(); i++) { Bitset512 subsetBones; const Subset& subset = subsets[i]; for (size_t t = 0; t < subset.triIndices.size(); t++) { size_t triIndex = subset.triIndices[t]; subsetBones.mergeInto(tris[triIndex].boneIndices); } size_t numUniqueBones = subsetBones.countBits(); printf("Num tris = %d, Num unique bones = %d\n", int(subset.triIndices.size()), int(numUniqueBones)); } } void generate_subsets(const Tri* tris, std::vector& res) { res.clear(); Bitset512 workingSet; uint8_t triWasAddedToSubset[kNumTris]; memset(&triWasAddedToSubset[0], 0, kNumTris); for (;;) { // find the first subset triangle (TODO: improve?) size_t startTriIndex = 0xffffffff; for (size_t i = 0; i < kNumTris; i++) { if (triWasAddedToSubset[i] == 0) { startTriIndex = i; break; } } if (startTriIndex >= kNumTris) { printf("\n\nDone\n"); return; } //printf("Add subset..."); res.emplace_back(); Subset& subset = res.back(); subset.triIndices.emplace_back(startTriIndex); // add it to the working set workingSet = tris[startTriIndex].boneIndices; // and mark as added triWasAddedToSubset[startTriIndex] = 1; for (;;) { // find the best triangle to add to working set size_t numBonesInSet = workingSet.countBits(); Bitset512 bestTriangleBitSet; int bestTriangleIndex = -1; size_t numNewBonesAddedByBestTri = 10000; for (size_t triIndex = 0; triIndex < kNumTris; triIndex++) { if (triWasAddedToSubset[triIndex] != 0) { continue; } size_t newNumBonesCount = workingSet.countMerged(tris[triIndex].boneIndices); assert(newNumBonesCount >= numBonesInSet); if (newNumBonesCount < kMaxNumBonesInSubset) { #if 1 // Simple heuristic: If it fits, it's good enough bestTriangleBitSet = tris[triIndex].boneIndices; bestTriangleIndex = (int)triIndex; #else // Complex heuristic: try to fin "best" triangle (that adds less bones to the set) size_t numNewBonesAdded = newNumBonesCount - numBonesInSet; if (numNewBonesAdded < numNewBonesAddedByBestTri) { bestTriangleBitSet = triangleBitSet; bestTriangleIndex = (int)triIndex; numNewBonesAddedByBestTri = numNewBonesAdded; // optimization: found a FREE triangle no need to search further if (numNewBonesAdded == 0) { break; } } #endif } } if (bestTriangleIndex < 0) { // no more triangles left - need to start a new subset printf("%d tris\n", int(subset.triIndices.size())); break; } assert(bestTriangleIndex >= 0 && kMaxNumBonesInSubset < kNumTris); triWasAddedToSubset[bestTriangleIndex] = 1; workingSet.mergeInto(bestTriangleBitSet); subset.triIndices.emplace_back(bestTriangleIndex); } } assert(false); } int main() { std::vector vb; vb.resize(kNumVerts); for (size_t i = 0; i < kNumVerts; i++) { vb[i].boneIndices[0] = rand() % kMaxBoneIndexInTri; vb[i].boneIndices[1] = rand() % kMaxBoneIndexInTri; vb[i].boneIndices[2] = rand() % kMaxBoneIndexInTri; vb[i].boneIndices[3] = rand() % kMaxBoneIndexInTri; } std::vector tris; tris.resize(kNumTris); for (size_t i = 0; i < kNumTris; i++) { uint32_t i0 = rand() % kNumVerts; uint32_t i1 = rand() % kNumVerts; uint32_t i2 = rand() % kNumVerts; get_tri_bitset(vb[i0], vb[i1], vb[i2], tris[i].boneIndices); } std::vector subsets; clock_t start_time = clock(); generate_subsets(tris.data(), subsets); clock_t end_time = clock(); // Calculate the elapsed time in nanoseconds double elapsed_ns = (end_time - start_time) / double(CLOCKS_PER_SEC) * 1000.0; print_subsets(subsets, tris.data()); printf("Time = %f ms\n", elapsed_ns); printf("Using %3.2f Mb\n", (kNumTris * sizeof(Bitset512)) / (1024.0 * 1024.0)); return 0; }