FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
cleanEntries.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2014-2015, Siemens AG
3  * Author: Johannes Najjar
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  * See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
25 #include "cleanEntries.hpp"
26 #include <sstream>
27 #include <iterator>
28 using std::stringstream;
29 using std::ostream_iterator;
30 
39 string cleanGeneral(string::const_iterator sBegin, string::const_iterator sEnd)
40 {
41  stringstream ss;
42  rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex("[[:space:]\\x0-\\x1f]{2,}"), " ");
43  string s = ss.str();
44  string::size_type len = s.length();
45  if (len > 1)
46  {
47  char cBegin = s[0];
48  char cEnd = s[len - 1];
49  if (cBegin == ' ' && cEnd == ' ')
50  return s.substr(1, len - 2);
51  else if (cBegin == ' ')
52  return s.substr(1);
53  else if (cEnd == ' ')
54  return s.substr(0, len - 1);
55  }
56  // Only one character/space??? Should not be possible
57  return s == " " ? "" : s;
58 }
59 
67 string cleanStatement(string::const_iterator sBegin, string::const_iterator sEnd)
68 {
69  stringstream ss;
70  rx::regex_replace(ostream_iterator<char>(ss), sBegin, sEnd, rx::regex("\n[[:space:][:punct:]]*"), " ");
71  string s = ss.str();
72  return cleanGeneral(s.begin(), s.end());
73 }
74 
83 string cleanMatch(const string& sText, const match& m)
84 {
85  string::const_iterator it = sText.begin();
86  icu::UnicodeString unicodeStr = fo::recodeToUnicode(string(it + m.start,
87  it + m.end));
88  string utfCompatibleText;
89 
90  unicodeStr.toUTF8String(utfCompatibleText);
91 
92  if (m.type == "statement")
93  return cleanStatement(utfCompatibleText.begin(), utfCompatibleText.end());
94  else
95  return cleanGeneral(utfCompatibleText.begin(), utfCompatibleText.end());
96 }
97 
const int start
Definition: scanners.hpp:46
Store the results of a regex match.
Definition: scanners.hpp:39
int s
The socket that the CLI will use to communicate.
Definition: fo_cli.c:48
string cleanGeneral(string::const_iterator sBegin, string::const_iterator sEnd)
Trim space at beginning and end.
Definition: cleanEntries.cc:39
const int end
Definition: scanners.hpp:46
string cleanStatement(string::const_iterator sBegin, string::const_iterator sEnd)
Clean copyright statements from special characters (comment characters in programming languages...
Definition: cleanEntries.cc:67
icu::UnicodeString recodeToUnicode(const std::string &input)
Definition: libfossUtils.cc:43
string cleanMatch(const string &sText, const match &m)
Clean the text based on type.
Definition: cleanEntries.cc:83