25 #include "cleanEntries.hpp" 28 using std::stringstream;
29 using std::ostream_iterator;
39 string cleanGeneral(string::const_iterator sBegin, string::const_iterator sEnd)
42 rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex(
"[[:space:]\\x0-\\x1f]{2,}"),
" ");
44 string::size_type len = s.length();
48 char cEnd = s[len - 1];
49 if (cBegin ==
' ' && cEnd ==
' ')
50 return s.substr(1, len - 2);
51 else if (cBegin ==
' ')
54 return s.substr(0, len - 1);
57 return s ==
" " ?
"" :
s;
67 string cleanStatement(string::const_iterator sBegin, string::const_iterator sEnd)
70 rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex(
"\n[[:space:][:punct:]]*"),
" ");
85 string::const_iterator it = sText.begin();
88 string utfCompatibleText;
90 unicodeStr.toUTF8String(utfCompatibleText);
92 if (m.type ==
"statement")
93 return cleanStatement(utfCompatibleText.begin(), utfCompatibleText.end());
95 return cleanGeneral(utfCompatibleText.begin(), utfCompatibleText.end());
Store the results of a regex match.
int s
The socket that the CLI will use to communicate.
string cleanGeneral(string::const_iterator sBegin, string::const_iterator sEnd)
Trim space at beginning and end.
string cleanStatement(string::const_iterator sBegin, string::const_iterator sEnd)
Clean copyright statements from special characters (comment characters in programming languages...
icu::UnicodeString recodeToUnicode(const std::string &input)
string cleanMatch(const string &sText, const match &m)
Clean the text based on type.