40 #include "nomos_utils.h" 43 #include "nomos_regex.h" 45 #include "_autodefs.h" 47 #define HASHES "#####################" 48 #define DEBCPYRIGHT "debian/copyright" 53 static void licenseStringChecks();
54 static void findLines(
char *,
char *,
int,
int,
list_t *);
66 extern void memStats();
75 #define MAX(a, b) ((a) > (b) ? a : b) 76 #define MIN(a, b) ((a) < (b) ? a : b) 94 traceFunc(
"== licenseInit()\n");
98 strcpy(some,
"=SOME=");
100 strcpy(year,
"=YEAR=");
110 for (i = 0; i < NFOOTPRINTS; i++) {
113 if (licSpec[i].text.csData ==
NULL_STR) {
116 if ((licSpec[i].text.csLen == 1) && (*(licSpec[i].
text.
csData) ==
'.')) {
120 else if ((licSpec[i].seed.csLen == licSpec[i].
text.
csLen) && !memcmp(
121 licSpec[i].seed.csData, licSpec[i].
text.
csData, len)) {
131 fixSearchString(buf,
sizeof(buf), i,
YES);
142 LOG_FATAL(
"Cannot enqueue search-cache item \"%s\"",
licText[i].tseed)
152 if (strcmp(
licText[i].tseed,
"=NULL=") == 0) {
174 memcpy(buf, licSpec[i].text.csData, (
size_t)(len + 1));
186 fixSearchString(buf,
sizeof(buf), i,
NO);
190 if (p->ssComp < (ssAbove * 100) + ssBelow) {
191 p->ssComp = (ssAbove * 100) + ssBelow;
203 for (i = 0; i < NFOOTPRINTS; i++) {
206 LOG_NOTICE(
"License[%d] configured with NULL seed", i)
212 LOG_NOTICE(
"License[%d] seed == regex", i)
216 licText[i].nAbove = p->ssComp / 100;
217 licText[i].nBelow = p->ssComp % 100;
226 for (i = 0; i < NFOOTPRINTS; i++) {
250 if (i >= _CR_first && i <= _CR_last) {
257 #define LINE_BYTES 50 285 static int searchStrategy(int index, char *regex, int aboveCalc) { 298 traceFunc(
"== searchStrategy(%d(%s), \"%s\", %d)\n", index,
299 _SEED(index), regex, aboveCalc);
305 LOG_NOTICE(
"Lic[%d] has NULL seed", index)
309 if (regex ==
NULL_STR || strlen(regex) == 0) {
311 Assert(
NO,
"searchStrategy(%d) called with NULL data", index);
315 if (strcmp(
s, regex) == 0) {
318 bytes = words = lines = 0;
319 (void) strcpy(seed,
s);
320 while (seed[strlen(seed) - 1] ==
' ') {
325 if (
strGrep(seed, regex, REG_ICASE) == 0) {
327 printf(
"DEBUG: seed(%d) no hit in regex!\n", index);
333 for (minLines = 0; cp != NULL;
start = cp + 1) {
334 matchWild = matchSeed = 0;
338 matchWild = (strcmp(
start, any) == 0 || strcmp(
start, some) == 0
339 || strcmp(
start, few));
340 matchSeed = strcmp(
start, seed) == 0;
343 words += (matchWild ?
WC_WORDS : 1);
356 printf(
"ABOVE: .... bytes=%d, words=%d; max(%d,%d)+%d == %d\n",
360 return (words == 0 ? 0 : lines);
364 matchWild = matchSeed = 0;
368 matchWild = (strcmp(
start, any) == 0 || strcmp(
start, some) == 0
369 || strcmp(
start, few));
370 matchSeed = strcmp(
start, seed) == 0;
377 words += (matchWild ?
WC_WORDS : 1);
385 printf(
"BELOW: .... bytes=%d, words=%d; max(%d,%d)+%d == %d\n",
392 static void fixSearchString(
char *
s,
int size,
int i,
int wildcardBad)
399 traceFunc(
"== fixSearchString(\"%s\", %d, %d, %d)\n", s, size, i,
416 while (isspace(*cp)) {
419 if (strncmp(cp, any,
sizeof(any)-1) == 0 ||
420 strncmp(cp, some,
sizeof(some)-1) == 0 ||
421 strncmp(cp, few,
sizeof(few)-1) == 0) {
422 printf(
"string %d == \"%s\"\n", i, cp);
423 LOG_FATAL(
"Text-spec %d begins with a wild-card", i)
430 (
void) sprintf(wildCard, " %s", any);
431 len = strlen(wildCard);
432 for (cp = s;
strGrep(wildCard, cp, 0); ) {
434 LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
438 LOG_FATAL(
"String %d ends in a wild-card", i)
441 else if (*(cp+cur.regm.rm_eo) == ' ') {
443 printf(
"BEFORE(any): %s\n", s);
445 cp += cur.regm.rm_so;
448 memmove(cp, cp+len-1, strlen(cp+len)+2);
450 printf(
"_AFTER(any): %s\n", s);
454 LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
455 cp += cur.regm.rm_eo;
461 (
void) sprintf(wildCard, " %s", some);
462 len = strlen(wildCard);
463 for (cp = s;
strGrep(wildCard, cp, 0); ) {
465 LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
468 if (*(cp+cur.regm.rm_eo) == NULL_CHAR) {
469 LOG_FATAL(
"String %d ends in a wild-card", i)
472 else if (*(cp+cur.regm.rm_eo) == ' ') {
474 printf(
"BEFORE(some): %s\n", s);
476 cp += cur.regm.rm_so;
484 memmove(cp, cp+len-6, strlen(cp+len)+7);
486 printf(
"_AFTER(some): %s\n", s);
490 LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
491 cp += cur.regm.rm_eo;
497 (
void) sprintf(wildCard, " %s", few);
498 len = strlen(wildCard);
499 for (cp = s;
strGrep(wildCard, cp, 0); ) {
501 LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
504 if (*(cp+cur.regm.rm_eo) == NULL_CHAR) {
505 LOG_FATAL(
"String %d ends in a wild-card", i)
508 else if (*(cp+cur.regm.rm_eo) == ' ') {
510 printf(
"BEFORE(few): %s\n", s);
512 cp += cur.regm.rm_so;
520 memmove(cp, cp+len-6, strlen(cp+len)+7);
522 printf(
"_AFTER(few): %s\n", s);
526 LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
527 cp += cur.regm.rm_eo;
537 if (strlen(s)+25 >= size) {
538 LOG_FATAL(
"buffer overflow, text-spec %d", i)
541 cp = (
char *)(s+cur.regm.rm_so);
543 printf(
"BEFORE: %s\n", s);
545 memmove(cp+25, cp+6, strlen(cp+len)+1);
546 memset(cp+6,
'_', 19);
548 printf(
"_MOVED: %s\n", s);
550 *cp = *(cp+4) = *(cp+9) = *(cp+14) = *(cp+19) =
'[';
553 *(cp+5) = *(cp+10) = *(cp+15) =
'0';
554 *(cp+6) = *(cp+11) = *(cp+16) =
'-';
555 *(cp+7) = *(cp+12) = *(cp+17) =
'9';
556 *(cp+3) = *(cp+8) = *(cp+13) = *(cp+18) = *(cp+23) =
']';
562 printf(
"_AFTER: %s\n", s);
572 if (*(p->
str) ==
'/')
574 strcpy(scp->fullpath, p->
str);
575 scp->nameOffset = (size_t) (cur.targetLen + 1);
580 strncpy(scp->fullpath, cur.
cwd,
sizeof(scp->fullpath)-1);
581 strncat(scp->fullpath,
"/",
sizeof(scp->fullpath)-1);
582 strncat(scp->fullpath, p->
str,
sizeof(scp->fullpath)-1);
583 scp->nameOffset = (size_t) (cur.cwdLen + 1);
625 cp = createRelativePath(p, scp);
628 printf(
"licenseScan: scan %s\n",
629 (
char *)(scp->fullpath+scp->nameOffset));
641 scp->size = cur.stbuf.st_size;
658 assert(NKEYWORDS >=
sizeof(scp->kwbm));
660 for (scp->kwbm = c = 0; c < NKEYWORDS; c++)
664 scp->kwbm |= (1 << c);
667 printf(
"Keyword %d (\"%s\"): YES\n", c,
_REGEX(c+_KW_first));
673 printf(
"%s = %d\n", (
char *)(scp->fullpath+scp->nameOffset),
696 if (scores->
score == 0)
720 for (scp = scores, i = nCand = 0; i < nFiles; i++, scp++)
722 scp->relpath = (
char *) (scp->fullpath + scp->nameOffset);
726 if (
idxGrep(_FN_DEBCPYRT, scp->relpath, REG_ICASE)) {
730 else if (scp->
score >= lowWater) {
739 printf(
"%s [score: %d], %07o\n", scp->fullpath,
740 scp->
score, scp->kwbm);
766 int counts[NKEYWORDS + 1];
774 traceFunc(
"== licenseScan(%p, %d)\n", l);
778 printf(
"... allocating %d bytes for scanres_t[] array\n",
779 sizeof(*scp)*licenseList->
used);
782 scores = (
scanres_t *) memAlloc(
sizeof(*scp) * licenseList->
used, MTAG_SCANRES);
783 memset((
void *) counts, 0, (
size_t) ((NKEYWORDS + 1) *
sizeof(
int)));
789 traceFunc(
"=> invoking qsort(): callback == scoreCompare()\n");
792 nFilesInList = licenseList->
used;
793 qsort(scores, (
size_t) nFilesInList,
sizeof(*scp),
scoreCompare);
806 if (scores->licenses) free(scores->licenses);
807 memFree((
char *) scores,
"scores table");
838 return (-strcmp(sc1->fullpath, sc2->fullpath));
848 traceFunc(
"== noLicenseFound\n");
851 (void) strcpy(cur.
compLic, LS_NOSUM);
869 printf(
" Highlighting Info at");
871 for (currentKeyw=0; currentKeyw < keyWords->len; ++currentKeyw ) {
873 printf(
" Keyword at %i, length %i, index = 0,", ourMatchv->
start, ourMatchv->
end - ourMatchv->
start );
876 for (currentLicence = 0; currentLicence < theMatches->len; ++currentLicence)
884 printf(
" License #%s# at %i, length %i, index = %i,", theLicence->
licenceName , ourMatchv->
start, ourMatchv->
end - ourMatchv->
start, ourMatchv->
index );
900 char miscbuf[myBUFSIZ];
905 (void) strcpy(miscbuf,
"Matches: ");
907 for (base = c = 0; c < NKEYWORDS; c++)
909 if (scores[idx].kwbm & (1 << c))
913 miscbuf[offset++] =
',';
914 miscbuf[offset++] =
' ';
916 offset += sprintf(miscbuf + offset,
"%s",
_REGEX(c + _KW_first));
920 printf(
"%s\n", miscbuf);
953 g_array_free(cur.docBufferPositionsAndOffsets, TRUE);
954 cur.docBufferPositionsAndOffsets = g_array_new(FALSE, FALSE,
sizeof(
pairPosOff));
957 for (cur.currentLicenceIndex = 0; cur.currentLicenceIndex < cur.
theMatches->len; ++cur.currentLicenceIndex)
966 for (myIndex = 0; myIndex < currentLicence->
indexList->len; ++myIndex)
968 int currentIndex = g_array_index(currentLicence->
indexList,
int, myIndex);
969 if (currentIndex == lastindex)
continue;
971 lastindex = currentIndex;
1000 int highScore = scores->
score;
1001 int isFileMarkupLanguage = 0;
1008 char realPathOfTarget[PATH_MAX];
1011 traceFunc(
"== saveLicenseData(%p, %d, %d, %d, %d)\n", scores, nCand,
1020 printf(
"saveLicenseData: %d candidates\n", nCand);
1033 for (idx = 0; i <= nCand; idx++) {
1037 if (scores[idx].flag == 0) {
1040 (void) sprintf(scores[idx].linkname,
"Link%03d.txt", i++);
1042 printf(
"name: %s\n[%s]\n", scores[idx].relpath, scores[idx].fullpath);
1050 fileName = scores[idx].fullpath;
1052 printf(
"File name: %s\n", fileName);
1061 size = scores[idx].size;
1062 if (scores[idx].dataOffset) {
1063 textp += scores[idx].dataOffset;
1078 printf(
"File score: %d (0x%06x)\n",
1079 (scores[idx].kwbm ? scores[idx].score : scores[idx].kwbm),
1081 if (scores[idx].kwbm) {
1100 #if defined(DEBUG) || defined(DOCTOR_DEBUG) || defined(LTSR_DEBUG) \ 1101 || defined(BATCH_DEBUG) || defined(PARSE_STOPWATCH) || defined(MEMSTATS) \ 1102 || defined(MEM_DEBUG) || defined(UNKNOWN_CHECK_DEBUG) 1103 printf(
"*** PROCESS File: %s\n", scores[idx].relpath);
1104 printf(
"... %d bytes, score %d\n", scores[idx].size, scores[idx].score);
1107 isFileMarkupLanguage =
idxGrep(_UTIL_MARKUP, textp, REG_ICASE | REG_EXTENDED);
1110 printf(
"idxGrep(ML) returns %d\n", isFileMarkupLanguage);
1111 if (isFileMarkupLanguage)
1114 printf(
"isMarkUp@%d: [", cur.regm.rm_so);
1115 for (n = cur.regm.rm_so; n <= cur.regm.rm_eo; n++) {
1116 printf(
"%c", *(textp+n));
1128 printf(
"idxGrep(PS) returns %d\n", isPS);
1131 printf(
"isPostScript@%d: [", cur.regm.rm_so);
1139 fileName =
parseLicenses(textp, size, &scores[idx], isFileMarkupLanguage, isPS);
1140 scores[idx].licenses =
copyString(fileName, MTAG_FILELIC);
1143 Assert(
NO,
"Expected non-null parseLicenses return!");
1145 if (scores[idx].licenses ==
NULL_STR) {
1146 Assert(
NO,
"Expected non-null license summary!");
1152 #ifdef FLAG_NO_COPYRIGHT 1153 if (gl.
flags & FL_NOCOPYRIGHT) {
1154 p =
listGetItem(&cur.nocpyrtList, scores[idx].relpath);
1156 p->num = scores[idx].
score;
1160 memFree(cur.licPara, MTAG_TEXTPARA);
1176 p =
listGetItem(&cur.lList, scores[idx].licenses);
1193 listSort(&cur.lList, SORT_BY_COUNT_DSC);
1196 if (cur.lList.
used == 0) {
1197 Assert(
NO,
"No entries in license-list");
1206 if (cur.parseList.
used == 0) {
1235 printf(
"File %s contains license(s) %s", realPathOfTarget, cur.
compLic);
1239 printf(
"File %s contains license(s) %s", basename(cur.
targetFile), cur.
compLic);
1284 traceFunc(
"== makeLicenseSummary(%p, %d, %p, %d)\n", l, highScore,
1289 (void) strcpy(target, LS_NOSUM);
1308 if (goodStuff && (p->iLevel <= IL_LOW)) {
1312 target[len++] =
',';
1315 new = sprintf(target + len,
"%s", p->
str);
1316 if ((len +=
new) > size) {
1317 LOG_FATAL(
"Buffer-overwrite, marginal license components")
1325 #ifdef LICENSE_DEBUG 1331 traceFunc(
"== dumpLicenses()\n");
1334 for (i = 0; i < NFOOTPRINTS; i++) {
1335 printf(
"License[%d]: seedlen=%d, regexlen=%d\n", i,
1336 licSpec[i].seed.csLen, licSpec[i].
text.
csLen);
1338 printf(
"[NFOOTPRINTS = %d\n", NFOOTPRINTS);
void writeJson()
Write the scan output as a JSON.
void licenseScan(list_t *licenseList)
scan the list for a license(s)
char targetFile[myBUFSIZ]
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
static void printHighlightInfo(GArray *keyWords, GArray *theMatches)
Print highlight info about matches.
int optionIsSet(int val)
Check if an CLI option is set.
static void noLicenseFound()
Mark curent scan as LS_NOSUM (No_license_found)
static void printKeyWordMatches(scanres_t *scores, int idx)
Prints keywords match to STDOUT.
tricky data structure used for a list of 'items'
if(!$Test &&$OptionQ) if($stdin_flag) if($Verbose) else
searchString_t text
License text.
static gint compare_integer(gconstpointer a, gconstpointer b)
Compare two integers.
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
char * tseed
unencrypted license text
int score
License match score.
int s
The socket that the CLI will use to communicate.
char * wordCount(char *textp)
VERY simple line count, does NOT have to be perfect!
GArray * matchPositions
Match positions.
void Bail(int exitval)
Close connections and exit.
if(!preg_match("/\s$projectGroup\s/", $groups)&&(posix_getgid()!=$gInfo['gid']))
get monk license list of one specified uploadtree_id
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
int start
Start position of match.
static void rescanOriginalTextForFoundLicences(char *textp, int isFileMarkupLanguage, int isPS)
Rescan original content for the licenses already found.
void listInit(list_t *l, int size, char *label)
intialize a list, if the list is not empty, empty it (initialize it to zero's).
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
item_t * listGetItem(list_t *l, char *s)
get an item from the itemlist. If the item is not in the itemlist, then add it to the itemlist...
char * csData
String data.
#define NULL_CHAR
NULL character.
static int scoreCompare(const void *, const void *)
Compare two scores.
static void makeLicenseSummary(list_t *, int, char *, int)
Construct a 'computed license'. Wherever possible, leave off the entries for None and LikelyNot; thos...
static void saveLicenseData(scanres_t *, int, int, int)
Save/creates all the license-data in a specific directory temp directory?
void Assert(int fatalFlag, const char *fmt,...)
Raise an assert.
searchString_t seed
License seed.
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>. The application passed in should match the script name in /etc/init.d
int idxGrep(int index, char *data, int flags)
compile a regex, and perform the search (on data?)
char * mmapFile(char *pathname)
Blarg. Files that are EXACTLY a multiple of the system pagesize do not get a NULL on the end of the b...
char * parseLicenses(char *filetext, int size, scanres_t *scp, int isML, int isPS)
Parse a file to check all the possible licenses and add them to matches.
#define MIN(a, b)
Min of two.
#define MAX(a, b)
Max of two.
char * copyString(char *s, char *label)
Create a copy of a string.
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
void licenseInit()
license initialization
void relaxScoreCriterionForSingleFile(scanres_t *scores)
Reset scores to 1 if it is 0.
char * pathBasename(char *path)
Get the basename from a file path.
char * licenceName
License names.
int index
Enums from index (Entrynumber) in STRINGS.in.
int fiterResultsOfKeywordScan(int lowWater, scanres_t *scores, int nFiles)
Run through the list once more.
int end
End position of match.
void listSort(list_t *l, int sortType)
Sort the list as per the sortType passed.
#define NULL_STR
NULL string.
static int searchStrategy(int, char *, int)
list_t type structure used to keep various lists. (e.g. there are multiple lists).
int strGrep(char *regex, char *data, int flags)
General-purpose grep function, used for one-time-only searches.
GArray * indexList
License indexes.
void listDump(list_t *l, int verbose)
print the passed in list
void munmapFile(void *ptr)
void listClear(list_t *l, int deallocFlag)
Destroy list_t.
#define NULL_ITEM
NULL item.
char * regex
License regex.
void scanForKeywordsAndSetScore(scanres_t *scores, list_t *licenseList)
GArray * keywordPositions
item_t * listIterate(list_t *l)
return a pointer to listitem, returns a NULL_ITEM when no more items to return.