FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
test_scanners.cc
1 /*********************************************************************
2 Copyright (C) 2014-2015, 2018 Siemens AG
3 
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 version 2 as published by the Free Software Foundation.
7 
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along
14 with this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 *********************************************************************/
17 
18 #include <cppunit/TestFixture.h>
19 #include <cppunit/extensions/HelperMacros.h>
20 
21 #include "regex.hpp"
22 #include "regscan.hpp"
23 #include "copyrightUtils.hpp"
24 #include "cleanEntries.hpp"
25 #include <list>
26 #include <cstring>
27 #include <ostream>
28 
29 using namespace std;
30 
36 ostream& operator<<(ostream& out, const list<match>& l)
37 {
38  for (auto m = l.begin(); m != l.end(); ++m)
39  out << '[' << m->start << ':' << m->end << ':' << m->type << ']';
40  return out;
41 }
42 
46 const char testContent[] = "© 2007 Hugh Jackman\n\n"
47  "Copyright 2004 my company\n\n"
48  "Copyrights by any strange people\n\n"
49  "(C) copyright 2007-2011, 2013 my favourite company Google\n\n"
50  "(C) 2007-2011, 2013 my favourite company Google\n\n"
51  "if (c) { return -1 } \n\n"
52  "Written by: me, myself and Irene.\n\n"
53  "Authors all the people at ABC\n\n"
54  "<author>Author1</author>"
55  "<head>All the people</head>"
56  "<author>Author1 Author2 Author3</author>"
57  "<author>Author4</author><b>example</b>"
58  "Apache\n\n"
59  "This file is protected under pants 1 , 2 ,3\n\n"
60  "Do not modify this document\n\n"
61  "the shuttle is a space vehicle designed by NASA\n\n"
62  "visit http://mysite.org/FAQ or write to info@mysite.org\n\n"
63  "maintained by benjamin drieu <benj@debian.org>\n\n"
64  "* Copyright (c) 1989, 1993\n" // Really just one newline here!
65  "* The Regents of the University of California. All rights reserved.\n\n"
66  "to be licensed as a whole";
67 
68 class scannerTestSuite : public CPPUNIT_NS :: TestFixture {
69  CPPUNIT_TEST_SUITE (scannerTestSuite);
70  CPPUNIT_TEST (copyscannerTest);
71  CPPUNIT_TEST (regAuthorTest);
72  CPPUNIT_TEST (regEccTest);
73  CPPUNIT_TEST (regUrlTest);
74  CPPUNIT_TEST (regEmailTest);
75  CPPUNIT_TEST (regKeywordTest);
76  CPPUNIT_TEST (cleanEntries);
77 
78  CPPUNIT_TEST_SUITE_END ();
79 
80 private:
88  void scannerTest (const scanner& sc, const char* content, const string& type, list<const char*> expectedStrings)
89  {
90  list<match> matches;
91  list<match> expected;
92  sc.ScanString(content, matches);
93 
94  for (auto s = expectedStrings.begin(); s != expectedStrings.end(); ++s)
95  {
96  const char * p = strstr(content, *s);
97  if (p)
98  {
99  int pos = p - content;
100  expected.push_back(match(pos, pos+strlen(*s), type));
101  }
102  // else: expected string is not contained in original string
103  }
104  CPPUNIT_ASSERT_EQUAL(expected, matches);
105  }
106 
107 protected:
116  {
117  // Test copyright matcher
119 
120  scannerTest(sc, testContent, "statement", { "© 2007 Hugh Jackman",
121  "Copyright 2004 my company",
122  "Copyrights by any strange people",
123  "(C) copyright 2007-2011, 2013 my favourite company Google",
124  "(C) 2007-2011, 2013 my favourite company Google",
125  "Copyright (c) 1989, 1993\n* The Regents of the University of California. All rights reserved."
126  });
127  }
128 
137  {
138  regexScanner sc("author", "copyright");
139  scannerTest(sc, testContent, "author", {
140  "Written by: me, myself and Irene.",
141  "Authors all the people at ABC",
142  "Author1",
143  "Author1 Author2 Author3",
144  "Author4",
145  "maintained by benjamin drieu <benj@debian.org>"
146  });
147  }
148 
156  void regEccTest () {
157  regexScanner sc("ecc", "ecc");
158  scannerTest(sc, testContent, "ecc", { "space vehicle designed by NASA" });
159  }
160 
168  void regUrlTest () {
169  regexScanner sc("url", "copyright");
170  scannerTest(sc, testContent, "url", { "http://mysite.org/FAQ" });
171  }
172 
180  void regEmailTest () {
181  regexScanner sc("email", "copyright",1);
182  scannerTest(sc, testContent, "email", { "info@mysite.org", "benj@debian.org" });
183  }
184 
192  void regKeywordTest () {
193  regexScanner sc("keyword", "keyword");
194  scannerTest(sc, testContent, "keyword", {"patent", "licensed as"});
195  }
196 
205  void cleanEntries () {
206  // Binary content
207  string actualFileContent;
208  ReadFileToString("../testdata/testdata142", actualFileContent);
209 
210  vector<string> binaryStrings;
211  std::stringstream *ss = new std::stringstream(actualFileContent);
212  string temp;
213 
214  while (std::getline(*ss, temp)) {
215  binaryStrings.push_back(temp);
216  }
217 
218  // Simulate matches. Each line is a match
219  vector<match> matches;
220  int pos = 0;
221  int size = binaryStrings.size();
222  for (int i = 0; i < size; i++)
223  {
224  int length = binaryStrings[i].length();
225  matches.push_back(
226  match(pos, pos + length, "statement"));
227  pos += length + 1;
228  }
229 
230  // Expected data
231  string expectedFileContent;
232  ReadFileToString("../testdata/testdata142_exp", expectedFileContent);
233 
234  delete(ss);
235  ss = new std::stringstream(expectedFileContent);
236  vector<string> expectedStrings;
237  while (std::getline(*ss, temp)) {
238  expectedStrings.push_back(temp);
239  }
240 
241  vector<string> actualStrings;
242  for (size_t i = 0; i < matches.size(); i ++)
243  {
244  actualStrings.push_back(cleanMatch(actualFileContent, matches[i]));
245  }
246 
247  CPPUNIT_ASSERT(expectedStrings == actualStrings);
248  }
249 };
250 
251 CPPUNIT_TEST_SUITE_REGISTRATION( scannerTestSuite );
bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
Definition: scanners.cc:32
Store the results of a regex match.
Definition: scanners.hpp:39
void regKeywordTest()
Test copyright scanner for keywords.
void regUrlTest()
Test copyright scanner for URL.
int s
The socket that the CLI will use to communicate.
Definition: fo_cli.c:48
virtual void ScanString(const string &s, list< match > &results) const =0
Scan the given string and add matches to results.
void regAuthorTest()
Test copyright scanner for author.
Provides a regex scanner using predefined regexs.
Definition: regscan.hpp:31
Abstract class to provide interface to scanners.
Definition: scanners.hpp:62
void scannerTest(const scanner &sc, const char *content, const string &type, list< const char * > expectedStrings)
Runs scanner on content and check matches against expectedStrings.
void copyscannerTest()
Test copyright scanner.
void cleanEntries()
Test cleanMatch() to remove non-UTF8 text and extra spaces.
void regEmailTest()
Test copyright scanner for email.
void regEccTest()
Test ECC scanner.
Implementation of scanner class for copyright.
Definition: copyscan.hpp:29
string cleanMatch(const string &sText, const match &m)
Clean the text based on type.
Definition: cleanEntries.cc:83