The find_words function with a small demonstration driver.
/*
* A small program that shows the find_words method and shows how to run it.
*/
#include <cctype>
#include <iostream>
#include <string>
#include <utility>
#include <list>
using namespace std;
/*
* Take a string, and return a list of pairs. Each pair is the position of
* the start of a alphabetic word, and the position of the first character
* after the word. Like this:
*
* 111111111122222222223333333333444444444455555555556666666666
* 0123456789012345678901234567890123456789012345678901234567890123456789
* This is a line containing some words.
* ^ ^ ^ ^ ^^ ^ ^^ ^^ ^ ^ ^
* [2,6] [9,11] [17,18] [24,28] [29,39] [40,44] [47,52]
*/
auto find_words(string s)
{
// All the lovely letters of the alphabet.
static const string ALPHABET =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
// This variable will be returned, and the return statement
// will be used by the auto to decide what type the function returns.
// So it returns the type of this variable.
list<pair<int,int>> ret;
// First character of the first alphanumeric word.
int start = s.find_first_of(ALPHABET);
while(start != string::npos) {
// Get the end of the word. If word extends to the end of
// the string, the location one after it is also after the
// string.
int end = s.find_first_not_of(ALPHABET,start);
if(end == string::npos) end = s.size();
// Push the pair onto the end of the return list.
ret.push_back(make_pair(start,end));
// Find the start of the next word
start = s.find_first_of(ALPHABET,end);
}
// See? The auto return value is controlled by this statement.
// If I had multiple return statements, the compiler would insist
// they return the same type of expression.
return ret;
}
int main()
{
// 0000000000111111111122222222223333333333444444
// 0123456789012345678901234567890123456789012345
string t = " This is the #1 (and only) test string";
// Run the find_words method.
auto res = find_words(t);
// The res is a list of pairs, which are positions of the words
// inside the string.
cout << " 0000000000111111111122222222223333333333444444" << endl;
cout << " 0123456789012345678901234567890123456789012345" << endl;
cout << "For \"" << t << "\":" << endl;
for(auto p: res) {
cout << " Found \"" << t.substr(p.first, p.second - p.first)
<< "\" at position " << p.first << " up to "
<< p.second << endl;
}
}