/** * This implements the Shift-Or-Algorithm. * Text input is via STDIN, pattern is expected as cmd argument. * Example call: echo "12121321231" | ./a.out 123 * * The algorithm exits early as soon as a match was found. It can be easily modified * to count all pattern occurences or even to provide all start positions. * * Compilation: g++ -O3 shiftor.cpp -o program * Execution: ./program AA text.txt */ #include #include #include #include #include #include int main(int argc, char** argv) { if(argc != 3) { std::cerr << "Pattern and file containing the text expected as cmd argument.\n"; return -1; } const std::string pattern = argv[1]; std::string text; std::fstream infile(argv[2]); for(std::string str; std::getline(infile, str); text += str + "\n") { ; } // Setup alphabet hashset std::unordered_set alphabet; alphabet.insert(pattern.begin(), pattern.end()); alphabet.insert(text.begin(), text.end()); // We represent the pattern with a vector of 32 Bit unsigned ints // -> The number of ints is `m / 32 + parity` const std::size_t width = pattern.size() / 32 + (pattern.size() % 32 == 0 ? 0 : 1); std::vector masks; masks.resize(alphabet.size() * width); // Init masks for(auto it = alphabet.begin(); it != alphabet.end(); ++it) { const char letter = *it; const std::size_t id = std::distance(alphabet.begin(), it); for(std::size_t i = 0; i < pattern.size(); ++i) { auto& mask = masks[(i / 32) + id * width]; if(pattern[i] == letter) mask &= ~(1UL << (i % 32)); // unset bit else mask |= (1UL << (i % 32)); // set bit } } // Start pattern matching std::vector state; state.resize(width, 0xFFFFFFFF); std::vector positions; for(auto it = text.begin(); it != text.end(); ++it) { const char letter = *it; // Determine number used in to refer to the correct mask const std::size_t id = std::distance(alphabet.begin(), alphabet.find(letter)); // Perform shift int carry = 0; for(std::size_t i = 0; i < state.size(); ++i) { int new_carry = ((state[i] & (1U << 31U)) > 0 ? 1 : 0); // State update is technically just shift, or, and lookup // However, if we have a state consisting of 2 ints and the right one has 1 at position 31, // then the 1 would be lost upon shifting. So we still need to carry it over. state[i] = ((state[i] << 1U) + carry) | masks[i + id * width]; carry = new_carry; } // We have a matich if the last state in the NFA is active, which is represented as 0 in Shift-Or if((state.back() & (1U << ((pattern.size() - 1) % 32U))) == 0) positions.push_back(((it - text.begin()) - pattern.size()) + 1); } // Print the results to STDOUT if(!positions.empty()) { std::copy(positions.begin(), std::prev(positions.end()), std::ostream_iterator(std::cout, ",")); std::cout << positions.back() << "\n"; } else { std::cout << "Not Found.\n"; } return 0; }