26 #include "doctorBuffer_utils.h" 29 #include <CUnit/CUnit.h> 32 #include "nomos_utils.h" 39 #include "nomos_regex.h" 40 #include "_autodefs.h" 54 fer = g_strdup_printf(
"//Th- is is a li-\n// cence of the test string");
55 buf = g_strdup_printf(
"This is a li cence of the test string");
62 CU_ASSERT_STRING_EQUAL(buf, fer);
80 fer = g_strdup_printf(
"//This is the test string");
85 printf(
"Before %d, after %d", (
int) strlen(cfer), (
int) strlen(fer));
87 for (
int i = 0; i < strlen(fer); i++)
89 CU_ASSERT_EQUAL(*(fer + i), *(cfer + uncollapsePosition(i, cur.docBufferPositionsAndOffsets)));
101 printf(
"I have %i matches \n", cur.
theMatches->len);
108 printf(
"Match from %d to %d: ", PaT->
start, PaT->
end);
110 for (
int j = PaT->
start; j < PaT->end; ++j)
112 printf(
"%c", *(buf + j));
133 buf = (
char*) malloc(3000);
135 int f = open(
"../testdata/NomosTestfiles/WXwindows/WXwindows.txt", O_RDONLY);
136 int whatIread = read(f, buf, 3000);
139 CU_ASSERT_EQUAL(whatIread, 2496);
140 undoc = g_strdup(buf);
142 printf(
"\n%s\n", undoc);
144 int licence_index = _PHR_WXWINDOWS;
145 int licence_index2 = _LT_LGPLref1;
148 cur.currentLicenceIndex=0;
149 g_array_append_val(cur.
indexList, licence_index);
150 g_array_append_val(cur.
indexList, licence_index2);
160 cur.currentLicenceIndex=0;
161 g_array_append_val(cur.
indexList, licence_index);
162 g_array_append_val(cur.
indexList, licence_index2);
166 printf(
"\n%s\n", buf);
173 cur.currentLicenceIndex=0;
174 g_array_append_val(cur.
indexList, licence_index);
175 g_array_append_val(cur.
indexList, licence_index2);
206 char* textBuffer = g_strdup_printf(
"" the big\t(C) and long\\n "\\s-1234," 207 " test © string \n con-\n// tains losts; of . <string test> " 208 " <body> \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 209 "mimi-cs printf(\"Licence\"); and so on\n " \n ");
211 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234," 212 " test © string \n con-\n// tains losts; of . <string test nbsp " 213 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 214 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
215 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
229 char* textBuffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234," 230 " test © string \n con-\n// tains losts; of . <string test nbsp " 231 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 232 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
236 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234," 237 " test © string \n con-\n\377\377 tains losts; of . <string test nbsp" 238 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 239 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
240 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
253 char* textBuffer=g_strdup_printf(
"(8) (89) -9.- A %%!PS-Adobe-3.0 (12) EPSF-3.0 --8. -9.- A");
257 char* te22Buffer = g_strdup_printf(
" %%!PS-Adobe-3.0 (12) EPSF-3.0 ");
258 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
274 char* textBuffer= g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234, test" 275 " © string \n con-\n\377\377 tains losts; of . <string test nbsp " 276 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 277 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
281 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long quot , test" 282 " © string \n con-\n\377\377 tains losts; of . <string test nbsp " 283 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 284 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
285 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
306 textBuffer = g_strdup_printf(
" quot the big\t(C) and long quot , test" 307 " © string \n con-\n\377\377 tains losts; of . <string test nbsp " 308 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n " 309 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
313 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot test" 314 " © string con- \377\377 tains losts of \377 test nbsp " 315 " body compli- cated / COMMENT s and funny / Words as it " 316 "mimi-cs printf Licence and so on quot ");
317 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
341 fer= g_strdup_printf(
"This- is the-test str- ing");
342 buf= g_strdup_printf(
"This\377\377is the-test str\377\377ing");
344 CU_ASSERT_STRING_EQUAL(buf, fer);
358 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , " 359 "test © string con- \377\377 tains losts of \377 test" 360 " nbsp body compli- cated / COMMENT s and funny / Words as it " 361 "mimi-cs printf Licence and so on quot ");
364 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © " 365 "string con- \377\377 tains losts of \377 test nbsp body " 366 "compli\377\377\377cated / COMMENT s and funny / Words as it " 367 "mimi-cs printf Licence and so on quot ");
368 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
383 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , " 384 "test © string con- \377\377 tains losts of \377 test " 385 "nbsp body compli\377\377\377cated / COMMENT s and funny / Words as it " 386 "mimi-cs printf Licence and so on quot ");
390 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test " 391 "© string con \377\377 tains losts of \377 test nbsp body " 392 "compli\377\377\377cated COMMENT s and funny Words as it " 393 "mimi-cs printf Licence and so on quot ");
394 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
412 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , test © " 413 "string con \377\377 tains losts of \377 test nbsp body " 414 "compli\377\377\377cated COMMENT s and funny Words as it " 415 "mimi-cs printf Licence and so on quot ");
418 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © " 419 "string con \377\377 tains losts of \377 test nbsp body " 420 "compli\377\377\377cated COMMENT s and funny Words as it " 421 "mimi-cs Licence and so on quot ");
422 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
437 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , test © " 438 "string con \377\377 tains losts of \377 test nbsp body " 439 "compli\377\377\377cated COMMENT s and funny Words as it " 440 "mimi-cs Licence and so on quot ");
443 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, " 444 "test © \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377" 445 "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s " 446 "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377" 447 "and so on \377\377quot \377\377");
449 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
462 char* textBuffer= g_strdup_printf(
" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, " 463 "test © \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377" 464 "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s " 465 "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377" 466 "and so on \377\377quot \377\377");
470 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © string " 471 "con tains losts of test nbsp body complicated COMMENT s and funny Words as " 472 "it mimi-cs Licence and so on quot ");
474 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
479 CU_TestInfo doctorBuffer_testcases[] =
void convertSpaceToInvisible(char *buf)
void removeBackslashesAndGTroffIndicators(char *buf)
Remove groff/troff font-size indicators, the literal string backslash-n and all backslahes, ala.
void test_doctorBuffer()
Test for doctorBuffer()
void test_8_ignoreFunctionCalls()
Test for ignoreFunctionCalls()
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
void test_7_removePunctuation()
Test for removePunctuation()
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
int compressDoctoredBuffer(char *textBuffer)
garbage collect: eliminate all INVISIBLE characters in the buffer
GArray * matchPositions
Match positions.
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
int start
Start position of match.
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
void ignoreFunctionCalls(char *buf)
Ignore function calls to print routines.
void convertWhitespaceToSpaceAndRemoveSpecialChars(char *buf, int isCR)
Convert white-space to real spaces, and remove unnecessary punctuation.
FUNCTION void addLicence(GArray *theMatches, char *licenceName)
Add a license to the matches array.
void test_5_convertWhitespaceToSpaceAndRemoveSpecialChars()
Test for convertWhitespaceToSpaceAndRemoveSpecialChars()
void test_9_convertSpaceToInvisible()
Test for convertSpaceToInvisible()
void removePunctuation(char *buf)
Clean up miscellaneous punctuation.
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
void licenseInit()
license initialization
void removeLineComments(char *buf)
Remove comments that start at the beginning of a line.
void test_doctorBuffer_fromFile()
Test for idxGrep_recordPosition()
void test_4_removeBackslashesAndGTroffIndicators()
Test for removeBackslashesAndGTroffIndicators()
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
void test_1_removeHtmlComments()
Test for removeHtmlComments()
void cleanUpPostscript(char *buf)
Remove newlines from buffer.
static void report_Match(char *buf)
Helper function to match licenses and highlight info.
void test_doctorBuffer_uncollapse()
Test for uncollapsePosition()
int end
End position of match.
void removeHtmlComments(char *buf)
Remove HTML comments from buffer without removing comment text.
void test_3_cleanUpPostscript()
Test for cleanUpPostscript()
void test_10_compressDoctoredBuffer()
Test for compressDoctoredBuffer()
void test_2_removeLineComments()
Test for removeLineComments()
void test_6_dehyphen()
Test for dehyphen()
void test_6a_dehyphen()
Test for dehyphen()
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.