Word Finding
The find_words function with a small demonstration driver.
/* * A small program that shows the find_words method and shows how to run it. */ #include <cctype> #include <iostream> #include <string> #include <utility> #include <list> using namespace std; /* * Take a string, and return a list of pairs. Each pair is the position of * the start of a alphabetic word, and the position of the first character * after the word. Like this: * * 111111111122222222223333333333444444444455555555556666666666 * 0123456789012345678901234567890123456789012345678901234567890123456789 * This is a line containing some words. * ^ ^ ^ ^ ^^ ^ ^^ ^^ ^ ^ ^ * [2,6] [9,11] [17,18] [24,28] [29,39] [40,44] [47,52] */ auto find_words(string s) { // All the lovely letters of the alphabet. static const string ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; // This variable will be returned, and the return statement // will be used by the auto to decide what type the function returns. // So it returns the type of this variable. list<pair<int,int>> ret; // First character of the first alphanumeric word. int start = s.find_first_of(ALPHABET); while(start != string::npos) { // Get the end of the word. If word extends to the end of // the string, the location one after it is also after the // string. int end = s.find_first_not_of(ALPHABET,start); if(end == string::npos) end = s.size(); // Push the pair onto the end of the return list. ret.push_back(make_pair(start,end)); // Find the start of the next word start = s.find_first_of(ALPHABET,end); } // See? The auto return value is controlled by this statement. // If I had multiple return statements, the compiler would insist // they return the same type of expression. return ret; } int main() { // 0000000000111111111122222222223333333333444444 // 0123456789012345678901234567890123456789012345 string t = " This is the #1 (and only) test string"; // Run the find_words method. auto res = find_words(t); // The res is a list of pairs, which are positions of the words // inside the string. cout << " 0000000000111111111122222222223333333333444444" << endl; cout << " 0123456789012345678901234567890123456789012345" << endl; cout << "For \"" << t << "\":" << endl; for(auto p: res) { cout << " Found \"" << t.substr(p.first, p.second - p.first) << "\" at position " << p.first << " up to " << p.second << endl; } }