// File built using the following: (Boost required) // g++ -std=c++11 indexer.cpp -o indexer -lboost_system -lboost_filesystem #include #include #include #include #include #include #include #include using namespace std; using namespace boost::filesystem; typedef string word; typedef string filename; typedef map> index_type; int search(int argc, char** argv) { if(argc < 4) { cerr << "Index file and term not specified.\n"; return EXIT_FAILURE; } string given_search_term(argv[3]); ifstream index_file(argv[2]); string line; bool found = false; string::size_type first_delimiter, second_delimiter; while(getline(index_file, line)) { // If term is not in line first_delimiter = line.find("///"); string line_search_term(line.substr(0, first_delimiter)); if(given_search_term != line_search_term) continue; found = true; second_delimiter = line.find("///", first_delimiter + 3); cout << line.substr(first_delimiter + 3, second_delimiter - first_delimiter - 3) << '\n'; } if(!found) { cout << "Search term not found in files.\n"; } index_file.close(); return EXIT_SUCCESS; } forward_list build_file_list(const path& directory) { forward_list l; for (directory_iterator itr(directory); itr != directory_iterator(); ++itr) { l.push_front(itr->path().native()); } return l; } class word_iterator { public: word_iterator(ifstream& file) : file_(file) {} bool next(string& word) { word = ""; char letter; bool next = false; while(!end()) { letter = file_.get(); // Skip non-alpha characters if(!isalpha(letter)) continue; next = true; break; } do { word += tolower(letter); letter = file_.get(); } while(!end() && isalpha(letter)); return next; } inline bool end() { return file_.eof(); } private: ifstream& file_; }; index_type build_index(forward_list files_to_be_indexed) { index_type index; for (const auto& filename : files_to_be_indexed) { ifstream file(filename); word_iterator itr(file); string word; while(itr.next(word)) { ++index[word][filename]; } file.close(); } return index; } void write_index_to_file(index_type index, char* filename) { string line; ofstream index_file(filename); for (const auto& outer_pair : index) { for (const auto& inner_pair : outer_pair.second) { stringstream ss; ss << outer_pair.first << "///" << inner_pair.first << "///" << inner_pair.second << '\n'; line = ss.str(); index_file << line; } } } int index(int argc, char** argv) { if(argc < 3) { cerr << "Need to specifiy a directory and index file to write create." << '\n'; return EXIT_FAILURE; } path directory(argv[2]); char* index_file = argv[3]; if(!is_directory(directory)) { cerr << "Specified directory is invalid." << '\n'; return EXIT_FAILURE; } forward_list files_to_be_indexed = build_file_list(absolute(directory)); index_type words_in_files = build_index(files_to_be_indexed); write_index_to_file(words_in_files, index_file); return EXIT_SUCCESS; } void error_usage(char* program_name) { cerr << "Incorrect command line usage\n"; cerr << "Usage: " << program_name << " index directory_to_index index_file\n"; cerr << "Usage: " << program_name << " search index_file term\n"; } int main(int argc, char** argv) { if(argc < 2) { error_usage(argv[0]); return EXIT_FAILURE; } if(strcmp(argv[1], "search") == 0) return search(argc, argv); else if(strcmp(argv[1], "index") == 0) return index(argc, argv); error_usage(argv[0]); return EXIT_FAILURE; }