#include <iostream>
#include <regex>
#include <string>
#include <vector>
using namespace std;
#define debug(exp) do { cout << #exp << ": " << (exp) << endl; } while (0)
int main (int argc, char *argv[])
{
vector<string> res;
string pattern (""),
text ("ilovechina");
vector<string> dict = { "i", "love", "china", "lovechina", "ilove" };
vector<string>::iterator dict_it = dict.begin ();
if (dict_it == dict.end ()) throw invalid_argument (__func__);
while (true)
{
pattern += *dict_it;
if (++dict_it == dict.end ()) break;
else pattern += "|";
}
debug (pattern);
regex re (pattern);
sregex_iterator ri (text.begin (), text.end (), re),
ri_end;
for (; ri != ri_end; ++ri)
{
ssub_match sm = (*ri)[0];
// Edit:
// Problem here. match_results::position shouldn't
// be used to check matches.
// This `if` is originally to check instant matches.
// The standard says it's the distance from the target sequence
// but I misunderstood which the target is.
// "The string being searched" is the correct name.
if (ri->position (0)) // The near string does not correspond to the dictionary
{
debug (ri->position (0));
debug (ri->length (0));
debug (string (sm.first, sm.second));
throw runtime_error ("not in dictionary");
}
// Edit:
// But I used match_results::position correctly here...
// This is originally used to check if all words
// are "instantly" matched.
if (ri->position (0) + ri->length (0) == text.length ()) // successfully delimited all the words in the text
break;
res.push_back (sm.str ());
}
if (ri != ri_end) throw runtime_error ("not in dictionary");
}
I expect this code to based on the elements of dict
, delimit the words in text
.
But why does it always skip the first word "i"
and goes to the second word "love"
, outputting 1 as the first value of ri->position (0)
?
My output:
pattern: i|love|china|lovechina|ilove
ri->position (0): 1
ri->length (0): 4
string (sm.first, sm.second): love
terminate called after throwing an instance of 'std::runtime_error'
what(): not in dictionary
Aborted (core dumped)
Probably just a stupid error.
Debugged version:
#include <iostream>
#include <regex>
#include <string>
#include <vector>
using namespace std;
#define debug(exp) do { cout << #exp << ": " << (exp) << endl; } while (0)
vector<string> delimit_words (string text, vector<string> dict)
{
vector<string> res;
string pattern;
if (dict.empty ()) throw invalid_argument (__func__);
vector<string>::iterator dict_it = dict.begin ();
while (true)
{
pattern += *dict_it;
if (++dict_it == dict.end ()) break;
else pattern += "|";
}
regex re (pattern);
if (text.empty ()) return res;
sregex_iterator ri (text.begin (), text.end (), re),
ri_end;
for (; ri != ri_end; ++ri)
{
if (ri->prefix ().length () != 0)
throw runtime_error ("not a instant match");
res.push_back (ri->str (0));
if (ri->position (0) + ri->length (0) == text.length ())
return res;
}
throw runtime_error ("search failed");
}
int main (int argc, char *argv[])
{
string text = "ilovechina";
vector<string> dict = { "i", "love", "china", "lovechina", "ilove" };
vector<string> words = delimit_words (text, dict);
for (vector<string>::iterator it = words.begin ();
it != words.end ();
++it)
{
cout << *it << ' ';
}
cout << endl;
return 0;
}