FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
test_DoctoredBuffer.c
Go to the documentation of this file.
1 /*
2 Copyright (C) 2014, Siemens AG
3 
4 Author: Johannes Najjar, Steffen Weber
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
23 #include <stdbool.h>
24 
25 #include "nomos.h"
26 #include "doctorBuffer_utils.h"
27 #include <stdio.h> /* printf, scanf, NULL */
28 #include <stdlib.h> /* malloc, free, rand */
29 #include <CUnit/CUnit.h>
30 
31 #include <stdarg.h>
32 #include "nomos_utils.h"
33 
34 #include "nomos.h"
35 #include "util.h"
36 #include "list.h"
37 #include "licenses.h"
38 #include "process.h"
39 #include "nomos_regex.h"
40 #include "_autodefs.h"
41 
50 {
51  licenseInit();
52  char* buf, *fer;
53 
54  fer = g_strdup_printf("//Th- is is a li-\n// cence of the test string");
55  buf = g_strdup_printf("This is a li cence of the test string");
56  printf("%s \n", buf);
57  printf("%s \n", fer);
58  initializeCurScan(&cur);
59  doctorBuffer(fer, 0, 0, 0);
60  printf("%s \n", buf);
61  printf("%s \n", fer);
62  CU_ASSERT_STRING_EQUAL(buf, fer);
63  freeAndClearScan(&cur);
64  g_free(buf);
65  g_free(fer);
66 }
67 
76 {
77 
78  licenseInit();
79  char *fer, *cfer;
80  fer = g_strdup_printf("//This is the test string");
81  cfer= g_strdup( fer);
82  printf("%s \n", fer);
83  initializeCurScan(&cur);
84  doctorBuffer(fer, 0, 0, 0);
85  printf("Before %d, after %d", (int) strlen(cfer), (int) strlen(fer));
86 
87  for (int i = 0; i < strlen(fer); i++)
88  {
89  CU_ASSERT_EQUAL(*(fer + i), *(cfer + uncollapsePosition(i, cur.docBufferPositionsAndOffsets)));
90  }
91 
92  g_free(cfer);
93  g_free(fer);
94 }
95 
99 static void report_Match(char* buf)
100 {
101  printf("I have %i matches \n", cur.theMatches->len);
102  for (int i = 0; i < cur.theMatches->len; ++i)
103  {
105  for (int k=0; k < licenceAndMatch->matchPositions->len; ++k) {
107  licenceAndMatch->matchPositions, k);
108  printf("Match from %d to %d: ", PaT->start, PaT->end);
109 
110  for (int j = PaT->start; j < PaT->end; ++j)
111  {
112  printf("%c", *(buf + j));
113  }
114 
115  printf("\n");
116  }
117  }
118 }
119 
129 {
130 
131  licenseInit();
132  char* buf, *undoc;
133  buf = (char*) malloc(3000);
134 
135  int f = open("../testdata/NomosTestfiles/WXwindows/WXwindows.txt", O_RDONLY);
136  int whatIread = read(f, buf, 3000);
137  close(f);
138 
139  CU_ASSERT_EQUAL(whatIread, 2496);
140  undoc = g_strdup(buf);
141 
142  printf("\n%s\n", undoc);
143  fflush(stdout);
144  int licence_index = _PHR_WXWINDOWS;
145  int licence_index2 = _LT_LGPLref1;
146 
147  initializeCurScan(&cur);
148  cur.currentLicenceIndex=0;
149  g_array_append_val(cur.indexList, licence_index);
150  g_array_append_val(cur.indexList, licence_index2);
151  addLicence(cur.theMatches, "WXwindows");
152  idxGrep_recordPosition(licence_index, undoc, REG_ICASE | REG_EXTENDED);
153  idxGrep_recordPosition(licence_index2, undoc, REG_ICASE | REG_EXTENDED);
154  report_Match(undoc);
155  freeAndClearScan(&cur);
156 
157  report_Match(undoc);
158 
159  initializeCurScan(&cur);
160  cur.currentLicenceIndex=0;
161  g_array_append_val(cur.indexList, licence_index);
162  g_array_append_val(cur.indexList, licence_index2);
163  addLicence(cur.theMatches, "WXwindows");
164  doctorBuffer(buf, 0, 0, 0);
165 
166  printf("\n%s\n", buf);
167  idxGrep_recordPositionDoctored(licence_index, buf, REG_ICASE | REG_EXTENDED);
168  idxGrep_recordPositionDoctored(licence_index2, buf, REG_ICASE | REG_EXTENDED);
169  report_Match(undoc);
170  freeAndClearScan(&cur);
171 
172  initializeCurScan(&cur);
173  cur.currentLicenceIndex=0;
174  g_array_append_val(cur.indexList, licence_index);
175  g_array_append_val(cur.indexList, licence_index2);
176  addLicence(cur.theMatches, "WXwindows");
177  idxGrep_recordPosition(licence_index, buf, REG_ICASE | REG_EXTENDED);
178  idxGrep_recordPosition(licence_index2, buf, REG_ICASE | REG_EXTENDED);
179  report_Match(buf);
180  freeAndClearScan(&cur);
181 
182  free(buf);
183  g_free(undoc);
184 
185 }
186 
193 
204 {
205  initializeCurScan(&cur);
206  char* textBuffer = g_strdup_printf("&quot the big\t(C) and long\\n &quot\\s-1234,"
207  " test &copy; string \n con-\n// tains losts; of . <string test> &nbsp;"
208  " <body> \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
209  "mimi-cs printf(\"Licence\"); and so on\n &quot \n ");
210  removeHtmlComments(textBuffer);
211  char* te22Buffer = g_strdup_printf(" quot the big\t(C) and long\\n quot\\s-1234,"
212  " test &copy string \n con-\n// tains losts; of . <string test nbsp "
213  " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
214  "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
215  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
216  g_free(textBuffer);
217  g_free(te22Buffer);
218 }
219 
227 {
228  initializeCurScan(&cur);
229  char* textBuffer = g_strdup_printf(" quot the big\t(C) and long\\n quot\\s-1234,"
230  " test &copy string \n con-\n// tains losts; of . <string test nbsp "
231  " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
232  "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
233 
234  removeLineComments(textBuffer);
235 
236  char* te22Buffer = g_strdup_printf(" quot the big\t(C) and long\\n quot\\s-1234,"
237  " test &copy string \n con-\n\377\377 tains losts; of . <string test nbsp"
238  " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
239  "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
240  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
241  g_free(textBuffer);
242  g_free(te22Buffer);
243 }
244 
251 {
252  initializeCurScan(&cur);
253  char* textBuffer=g_strdup_printf("(8) (89) -9.- A %%!PS-Adobe-3.0 (12) EPSF-3.0 --8. -9.- A");
254 
255  cleanUpPostscript(textBuffer);
256 
257  char* te22Buffer = g_strdup_printf(" %%!PS-Adobe-3.0 (12) EPSF-3.0 ");
258  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
259  g_free(textBuffer);
260  g_free(te22Buffer);
261 }
262 
263 
272 {
273  initializeCurScan(&cur);
274  char* textBuffer= g_strdup_printf(" quot the big\t(C) and long\\n quot\\s-1234, test"
275  " &copy string \n con-\n\377\377 tains losts; of . <string test nbsp "
276  " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
277  "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
278 
280 
281  char* te22Buffer = g_strdup_printf(" quot the big\t(C) and long quot , test"
282  " &copy string \n con-\n\377\377 tains losts; of . <string test nbsp "
283  " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
284  "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
285  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
286  g_free(textBuffer);
287  g_free(te22Buffer);
288 }
289 
301 {
302  initializeCurScan(&cur);
303  char* textBuffer;
304  int isCR = NO; // isCR switches the replacment of ';' and '.', in the project it is always NO
305 
306  textBuffer = g_strdup_printf(" quot the big\t(C) and long quot , test"
307  " &copy string \n con-\n\377\377 tains losts; of . <string test nbsp "
308  " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
309  "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
310 
312 
313  char* te22Buffer = g_strdup_printf(" quot the big (C) and long quot test"
314  " &copy string con- \377\377 tains losts of \377 test nbsp "
315  " body compli- cated / COMMENT s and funny / Words as it "
316  "mimi-cs printf Licence and so on quot ");
317  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
318  g_free(textBuffer);
319  g_free(te22Buffer);
320 
321 }
322 
323 
338 {
339  initializeCurScan(&cur);
340  char* buf, *fer;
341  fer= g_strdup_printf("This- is the-test str- ing");
342  buf= g_strdup_printf("This\377\377is the-test str\377\377ing");
343  dehyphen(fer);
344  CU_ASSERT_STRING_EQUAL(buf, fer);
345  g_free(buf);
346  g_free(fer);
347 }
348 
356 {
357  initializeCurScan(&cur);
358  char* textBuffer= g_strdup_printf(" quot the big (C) and long quot , "
359  "test &copy string con- \377\377 tains losts of \377 test"
360  " nbsp body compli- cated / COMMENT s and funny / Words as it "
361  "mimi-cs printf Licence and so on quot ");
362  dehyphen(textBuffer);
363 
364  char* te22Buffer = g_strdup_printf(" quot the big (C) and long quot , test &copy "
365  "string con- \377\377 tains losts of \377 test nbsp body "
366  "compli\377\377\377cated / COMMENT s and funny / Words as it "
367  "mimi-cs printf Licence and so on quot ");
368  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
369  g_free(textBuffer);
370  g_free(te22Buffer);
371 
372 }
373 
381 {
382  initializeCurScan(&cur);
383  char* textBuffer= g_strdup_printf(" quot the big (C) and long quot , "
384  "test &copy string con- \377\377 tains losts of \377 test "
385  "nbsp body compli\377\377\377cated / COMMENT s and funny / Words as it "
386  "mimi-cs printf Licence and so on quot ");
388  removePunctuation(textBuffer);
389 
390  char* te22Buffer = g_strdup_printf(" quot the big (C) and long quot , test "
391  "&copy string con \377\377 tains losts of \377 test nbsp body "
392  "compli\377\377\377cated COMMENT s and funny Words as it "
393  "mimi-cs printf Licence and so on quot ");
394  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
395  g_free(textBuffer);
396  g_free(te22Buffer);
397 }
398 
410 {
411  initializeCurScan(&cur);
412  char* textBuffer= g_strdup_printf(" quot the big (C) and long quot , test &copy "
413  "string con \377\377 tains losts of \377 test nbsp body "
414  "compli\377\377\377cated COMMENT s and funny Words as it "
415  "mimi-cs printf Licence and so on quot ");
416  ignoreFunctionCalls(textBuffer);
417 
418  char* te22Buffer = g_strdup_printf(" quot the big (C) and long quot , test &copy "
419  "string con \377\377 tains losts of \377 test nbsp body "
420  "compli\377\377\377cated COMMENT s and funny Words as it "
421  "mimi-cs Licence and so on quot ");
422  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
423  g_free(textBuffer);
424  g_free(te22Buffer);
425 }
426 
435 {
436  initializeCurScan(&cur);
437  char* textBuffer= g_strdup_printf(" quot the big (C) and long quot , test &copy "
438  "string con \377\377 tains losts of \377 test nbsp body "
439  "compli\377\377\377cated COMMENT s and funny Words as it "
440  "mimi-cs Licence and so on quot ");
441  convertSpaceToInvisible(textBuffer);
442 
443  char* te22Buffer = g_strdup_printf(" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, "
444  "test &copy \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377"
445  "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s "
446  "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377"
447  "and so on \377\377quot \377\377");
448 // printf("%s\n", te22Buffer);
449  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
450  g_free(textBuffer);
451  g_free(te22Buffer);
452 }
453 
460 {
461  initializeCurScan(&cur);
462  char* textBuffer= g_strdup_printf(" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, "
463  "test &copy \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377"
464  "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s "
465  "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377"
466  "and so on \377\377quot \377\377");
467 
468  compressDoctoredBuffer(textBuffer);
469 
470  char* te22Buffer = g_strdup_printf(" quot the big (C) and long quot , test &copy string "
471  "con tains losts of test nbsp body complicated COMMENT s and funny Words as "
472  "it mimi-cs Licence and so on quot ");
473 
474  CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
475  g_free(textBuffer);
476  g_free(te22Buffer);
477 }
478 
479 CU_TestInfo doctorBuffer_testcases[] =
480 {
481 { "Testing doctorBuffer:", test_doctorBuffer },
482 { "Testing doctorBufer_uncollapse", test_doctorBuffer_uncollapse },
483 { "Testing doctorBuffer_fromFile", test_doctorBuffer_fromFile },
484 { "Testing removeHtmlComents:", test_1_removeHtmlComments },
485 { "Testing removeLineComments:", test_2_removeLineComments },
486 { "Testing cleanUpPostscript:", test_3_cleanUpPostscript },
487 { "Testing removeBackslashes:", test_4_removeBackslashesAndGTroffIndicators },
488 { "Testing convertWhitespace:", test_5_convertWhitespaceToSpaceAndRemoveSpecialChars },
489 { "Testing dehyphen:", test_6_dehyphen },
490 { "Testing dehyphen2:", test_6a_dehyphen },
491 { "Testing removePunctuation:", test_7_removePunctuation },
492 { "Testing ignoreFunctionCalls:", test_8_ignoreFunctionCalls },
493 { "Testing convertSpaceToInvisible:", test_9_convertSpaceToInvisible },
494 { "Testing compressDoctoredBuffer:", test_10_compressDoctoredBuffer },
495 CU_TEST_INFO_NULL };
496 
void convertSpaceToInvisible(char *buf)
void removeBackslashesAndGTroffIndicators(char *buf)
Remove groff/troff font-size indicators, the literal string backslash-n and all backslahes, ala.
void test_doctorBuffer()
Test for doctorBuffer()
void test_8_ignoreFunctionCalls()
Test for ignoreFunctionCalls()
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
void test_7_removePunctuation()
Test for removePunctuation()
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
Definition: nomos_regex.c:231
int compressDoctoredBuffer(char *textBuffer)
garbage collect: eliminate all INVISIBLE characters in the buffer
GArray * matchPositions
Match positions.
Definition: nomos.h:392
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
Definition: nomos_regex.c:252
int start
Start position of match.
Definition: nomos.h:383
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
Definition: nomos_utils.c:909
GArray * theMatches
Definition: nomos.h:430
void ignoreFunctionCalls(char *buf)
Ignore function calls to print routines.
void dehyphen(char *buf)
void convertWhitespaceToSpaceAndRemoveSpecialChars(char *buf, int isCR)
Convert white-space to real spaces, and remove unnecessary punctuation.
FUNCTION void addLicence(GArray *theMatches, char *licenceName)
Add a license to the matches array.
Definition: nomos_utils.c:999
void test_5_convertWhitespaceToSpaceAndRemoveSpecialChars()
Test for convertWhitespaceToSpaceAndRemoveSpecialChars()
void test_9_convertSpaceToInvisible()
Test for convertSpaceToInvisible()
void removePunctuation(char *buf)
Clean up miscellaneous punctuation.
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
Definition: nomos_utils.c:921
void licenseInit()
license initialization
Definition: licenses.c:82
void removeLineComments(char *buf)
Remove comments that start at the beginning of a line.
void test_doctorBuffer_fromFile()
Test for idxGrep_recordPosition()
void test_4_removeBackslashesAndGTroffIndicators()
Test for removeBackslashesAndGTroffIndicators()
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
Definition: nomos_utils.c:949
void test_1_removeHtmlComments()
Test for removeHtmlComments()
void cleanUpPostscript(char *buf)
Remove newlines from buffer.
static void report_Match(char *buf)
Helper function to match licenses and highlight info.
void test_doctorBuffer_uncollapse()
Test for uncollapsePosition()
int end
End position of match.
Definition: nomos.h:384
void removeHtmlComments(char *buf)
Remove HTML comments from buffer without removing comment text.
void test_3_cleanUpPostscript()
Test for cleanUpPostscript()
GArray * indexList
Definition: nomos.h:429
Nomos header file.
#define NO
Definition: nomos.h:184
void test_10_compressDoctoredBuffer()
Test for compressDoctoredBuffer()
void test_2_removeLineComments()
Test for removeLineComments()
void test_6_dehyphen()
Test for dehyphen()
void test_6a_dehyphen()
Test for dehyphen()
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.
Definition: nomos_utils.c:934