FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
nomos_regex.c
Go to the documentation of this file.
1 /***************************************************************
2  Copyright (C) 2006-2011 Hewlett-Packard Development Company, L.P.
3  Copyright (C) 2014, Siemens AG
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License
7  version 2 as published by the Free Software Foundation.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License along
15  with this program; if not, write to the Free Software Foundation, Inc.,
16  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 
18  ***************************************************************/
19 //#define DEBUG_TNG
20 #ifndef DEBUG_TNG
21 #define CALL_IF_DEBUG_MODE(x)
22 #else
23 #define CALL_IF_DEBUG_MODE(x) x
24 #endif
25 
26 #include "nomos_regex.h"
27 #include "nomos_gap.h"
28 #include "nomos_utils.h"
37 static char regexErrbuf[myBUFSIZ];
38 
39 regex_t idx_regc[NFOOTPRINTS];
40 
49 void regexError(int ret, regex_t *regc, char *regex)
50 {
51 #ifdef PROC_TRACE
52  traceFunc("== regexError(%d, %p, %s)\n", ret, regc, regex);
53 #endif /* PROC_TRACE */
54 
55  (void) regerror(ret, regc, regexErrbuf, sizeof(regexErrbuf));
56  Msg("regex = \"%s\"\n", regex);
57  LOG_FATAL("regcomp failure: %s", regexErrbuf)
58  Bail(-__LINE__);
59 }
60 
67 int endsIn(char *s, char *suffix)
68 {
69  int slen = (int) strlen(s);
70  int sufflen = (int) strlen(suffix);
71  /*
72  * compare trailing chars in a string with a constant (should be faster
73  * than calling regcomp() and regexec()!)
74  */
75 #ifdef PROC_TRACE
76  traceFunc("== endsIn(%s, %s)\n", s, suffix);
77 #endif /* PROC_TRACE */
78 
79  if (strncasecmp(s + slen - sufflen, suffix, (size_t) sufflen) == 0)
80  {
81  return (1);
82  }
83  return (0);
84 }
85 
92 int lineInFile(char *pathname, char *regex)
93 {
94  char buf[myBUFSIZ];
95 
96 #ifdef PROC_TRACE
97  traceFunc("== lineInFile(%s, \"%s\")\n", pathname, regex);
98 #endif /* PROC_TRACE */
99 
100  (void) sprintf(buf, "^%s$", regex);
101  return (textInFile(pathname, buf, REG_NEWLINE));
102 }
103 
111 int textInFile(char *pathname, char *regex, int flags)
112 {
113  char *textp;
114  int ret;
115 
116 #ifdef PROC_TRACE
117  traceFunc("== textInFile(%s, \"%s\", 0x%x)\n", pathname, regex, flags);
118 #endif /* PROC_TRACE */
119 
120  if ((pathname == NULL_STR ) || (regex == NULL_STR ))
121  {
122 #ifdef QA_CHECKS
123  if (pathname == NULL_STR)
124  {
125  Assert(NO, "textInFile: NULL pathname");
126  }
127  if (regex == NULL_STR)
128  {
129  Assert(NO, "textInFile: NULL regex");
130  }
131 #endif /* QA_CHECKS */
132  return (0);
133  }
134  if ((textp = mmapFile(pathname)) == NULL_STR)
135  {
136  return (0);
137  }
138  ret = strGrep(regex, textp, flags);
139  munmapFile(textp);
140  return (ret);
141 }
142 
150 int strGrep(char *regex, char *data, int flags)
151 {
152  regex_t regc;
153  int ret;
154 
155 #ifdef PHRASE_DEBUG
156  int i;
157 #endif /* PHRASE_DEBUG */
158 
159 #if defined(PROC_TRACE) || defined(PHRASE_DEBUG)
160  traceFunc("== strGrep(\"%s\", %p, 0x%x)\n", regex, data, flags);
161 #endif /* PROC_TRACE || PHRASE_DEBUG */
162 
163  if (data == NULL_STR || regex == NULL_STR)
164  {
165  return (0);
166  }
167  /* DO NOT, repeat DO NOT add REG_EXTENDED as a default flag! */
168  if ((ret = regcomp(&regc, regex, flags)) != 0)
169  {
170  regexError(ret, &regc, regex);
171  regfree(&regc);
172  return (-1); /* <0 indicates compile failure */
173  }
174  /*
175  * regexec() returns 1 on failure and 0 on success - make sure we call
176  * regfree after the regexec call, else after a million or so regex
177  * searches we'll have lost a LOT of memory. :)
178  */
179  ret = regexec(&regc, data, 1, &cur.regm, 0);
180  regfree(&regc);
181  if (ret)
182  {
183  return (0); /* >0 indicates search failure */
184  }
185 #ifdef QA_CHECKS
186  if (cur.regm.rm_so == cur.regm.rm_eo)
187  {
188  Assert(NO, "start/end offsets are identical in strGrep()");
189  }
190 #endif /* QA_CHECKS */
191 #ifdef PHRASE_DEBUG
192  printf("strGrep MATCH(%s) @ %d! = {", regex, cur.regm.rm_so);
193  for (i = cur.regm.rm_so; i < cur.regm.rm_eo; i++)
194  {
195  printf("%c", data[i]);
196  }
197  printf("}\n");
198 #endif /* PHRASE_DEBUG */
199  if (gl.flags & FL_SAVEBASE)
200  {
201  cur.matchBase = data;
202  }
203  return (1);
204 }
205 
216 int idxGrep(int index, char *data, int flags)
217 {
218  return idxGrep_base(index, data, flags, 0);
219 }
220 
231 int idxGrep_recordPosition(int index, char *data, int flags)
232 {
233  if( optionIsSet(OPTS_NO_HIGHLIGHTINFO) ) {
234  return idxGrep_base(index, data, flags, 0);
235  }
236  else {
237  return idxGrep_base(index, data, flags, 1);
238  }
239 }
240 
252 int idxGrep_recordPositionDoctored(int index, char *data, int flags)
253 {
254 
255  if( optionIsSet(OPTS_NO_HIGHLIGHTINFO) ) {
256  return idxGrep_base(index, data, flags, 0);
257  }
258  else {
259  return idxGrep_base(index, data, flags, 2);
260  }
261 }
262 
274 int idxGrep_recordIndex(int index, char *data, int flags)
275 {
276  if( optionIsSet(OPTS_NO_HIGHLIGHTINFO) ) {
277  return idxGrep_base(index, data, flags, 0);
278  }
279  else {
280  return idxGrep_base(index, data, flags, 3);
281  }
282 }
283 
293 int matchOnce(int isPlain, char *data, char* regex, regex_t *rp,
294  regmatch_t* regmatch)
295 {
296  if(isPlain) {
297  return !strNbuf_noGlobals(data, regex, regmatch , 0 , cur.matchBase );
298  }
299 
300  return regexec(rp, data, 1, regmatch, 0);
301 }
302 
312 int storeOneMatch(regmatch_t currentRegMatch, int lastmatch, GArray* allmatches,
313  char** tmpData, char* data)
314 {
315  regmatch_t storeRegMatch;
316  storeRegMatch.rm_so = currentRegMatch.rm_so + lastmatch;
317  storeRegMatch.rm_eo = currentRegMatch.rm_eo + lastmatch;
318  g_array_append_val(allmatches, storeRegMatch);
319  lastmatch += currentRegMatch.rm_eo;
320  *tmpData = data + lastmatch;
321  return lastmatch;
322 }
323 
335 int idxGrep_base(int index, char *data, int flags, int mode)
336 {
337  int i;
338  int ret;
339 
340  int show = flags & FL_SHOWMATCH;
341  licText_t *ltp = licText + index;
346  regex_t *rp = idx_regc + index;
347 
348  CALL_IF_DEBUG_MODE(printf(" %i %i \"", index, ltp->plain);)
349 
350 #if defined(PROC_TRACE) || defined(PHRASE_DEBUG)
351  traceFunc("== idxGrep(%d, %p, 0x%x)\n... regex \"%s\"\n", index, data,
352  flags, _REGEX(index));
353 #endif /* PROC_TRACE || PHRASE_DEBUG */
354 
355  if (index > NFOOTPRINTS)
356  {
357  LOG_FATAL("idxGrep: index %d out of range", index)
358  Bail(-__LINE__);
359  }
360  if (data == NULL_STR)
361  {
362 #ifdef PHRASE_DEBUG
363  printf("idxGrep: NULL pointer to file data!\n");
364 #endif /* PHRASE_DEBUG */
365  return (0);
366  }
367 
368  if (ltp->plain )
369  {
370  ret = strNbuf(data, ltp->regex);
371  if(ret == 0) return (ret);
372  }
373  else {
374  if ((ret = regcomp(rp, ltp->regex, flags)))
375  {
376  fprintf(stderr, "Compile failed, regex #%d\n", index);
377  regexError(ret, rp, ltp->regex);
378  regfree(rp);
379  printf("Compile error \n");
380  return (-1); /* <0 indicates compile failure */
381  }
382 
383  if (regexec(rp, data, 1, &cur.regm, 0))
384  {
385  regfree(rp);
386  return (0);
387  }
388  else ret =1;
389 
390  #ifdef QA_CHECKS
391  if (cur.regm.rm_so == cur.regm.rm_eo)
392  {
393  regfree(rp);
394  Assert(NO, "start/end offsets are identical in idxGrep(%d)",
395  index);
396  }
397  #endif /* QA_CHECKS */
398  /* Set up a global match-length variable? */
399  if (show)
400  {
401  #ifdef DEBUG
402  printf("REGEX(%d) \"%s\"\n", index, ltp->regex);
403  #endif /* DEBUG */
404  printf("MATCH @ %d! = {", cur.regm.rm_so);
405  for (i = cur.regm.rm_so; i < cur.regm.rm_eo; i++)
406  {
407  printf("%c", data[i]);
408  }
409  printf("}\n");
410  }
411  if (gl.flags & FL_SAVEBASE)
412  {
413  cur.matchBase = data;
414  }
415  }
416 
418 
419  if (mode == 3 ) {
420  recordIndex(cur.indexList, index);
421  }
422  else if (mode==1 || mode == 2)
423  {
424  CALL_IF_DEBUG_MODE(printf("MATCH!\n");)
426 
427  CALL_IF_DEBUG_MODE(printf("%s", data);)
428 
429 
430  GArray* allmatches = g_array_new(FALSE, FALSE, sizeof(regmatch_t));
431  regmatch_t currentRegMatch;
432  int lastmatch = 0;
433 
434  char* tmpData = data;
435 
436  lastmatch = storeOneMatch(cur.regm, lastmatch, allmatches, &tmpData, data);
437 
438  while (!matchOnce(ltp->plain,tmpData, ltp->regex, rp, &currentRegMatch ) )
439  {
440  lastmatch = storeOneMatch(currentRegMatch, lastmatch, allmatches, &tmpData, data);
441  }
442 
443 
444  if(index >= _KW_first && index <= _KW_last ) {
445  rememberWhatWeFound(cur.keywordPositions, allmatches, index, mode);
446  }
447  else if (cur.currentLicenceIndex > -1 ) {
448  rememberWhatWeFound( getLicenceAndMatchPositions(cur.theMatches, cur.currentLicenceIndex )->matchPositions , allmatches, index, mode);
449  }
450  g_array_free(allmatches, 1);
451  CALL_IF_DEBUG_MODE(printf("Bye!\n");)
452  }
453 
454  if (!ltp->plain ) regfree(rp);
455 return (1);
456 }
457 
463 void recordIndex(GArray* indexList, int index){
464  g_array_append_val(indexList, index);
465 }
466 
473 static int getOffset(int posInDoctoredBuffer)
474 {
475  return uncollapsePosition(posInDoctoredBuffer, cur.docBufferPositionsAndOffsets);
476 }
477 
484 regmatch_t* getRegmatch_t(GArray* in, int index)
485 {
486  return & g_array_index(in, regmatch_t, index);
487 }
488 
496 void rememberWhatWeFound(GArray* highlight, GArray* regmatch_tArray, int index,
497  int mode)
498 {
499 
500  if (mode != 1 && mode != 2)
501  {
502  FOSSY_EXIT("This mode is not supported\n", 8);
503  return;
504  }
505 
506  int i = 0;
507  int nmatches = regmatch_tArray->len;
508  int alreadyFound = highlight->len;
509  g_array_set_size(highlight, alreadyFound + nmatches);
510 
511  for (i = 0; i < nmatches; ++i)
512  {
513  regmatch_t* theRegmatch = getRegmatch_t(regmatch_tArray, i);
514  if (theRegmatch->rm_eo == -1 || theRegmatch->rm_so == -1)
515  {
516  FOSSY_EXIT("Found match at negative position... this should not happen\n", 9);
517  return;
518  }
519 
520  MatchPositionAndType* ourMatchv = getMatchfromHighlightInfo(highlight, i + alreadyFound);
521  ourMatchv->start = (mode == 1) ? theRegmatch->rm_so : getOffset(theRegmatch->rm_so);
522  ourMatchv->end = (mode == 1) ? theRegmatch->rm_eo : getOffset(theRegmatch->rm_eo);
523  ourMatchv->index = index;
524 
525  CALL_IF_DEBUG_MODE(printf("here: %i - %i \n", ourMatchv->start, ourMatchv->end);)
526  }
527  CALL_IF_DEBUG_MODE(printf(" We go and now we know %d ", highlight->len);)
528 }
529 
530 #define _XC(q) ((char) xascii[q])
531 
539 int strNbuf(char *data, char *str){
540 
541  return strNbuf_noGlobals(data, str, &(cur.regm), gl.flags & FL_SAVEBASE , cur.matchBase );
542 }
543 
549 int strNbuf_noGlobals(char *data, char *str, regmatch_t* matchPos, int doSave,
550 char* saveData)
551 {
552  static int firstFlag = 1;
553  static char xascii[128];
554  int i;
555  int alph = 0;
556  int save = 0;
557  char *bufp;
558  char *pattp;
559  char *mark;
560  char x;
561  char firstx = 0;
562 
563 #if defined(PROC_TRACE) || defined(PHRASE_DEBUG)
564  traceFunc("== strNbuf(%p, %p)\n", data, str);
565 #endif /* PROC_TRACE || PHRASE_DEBUG */
566 
567  if (firstFlag)
568  {
569  firstFlag = 0;
570  /*
571  * 32 characters separate 'A' (65) and 'a' (97), contiguous up to 'Z'.
572  * Therefore, 'Z' == 90, 'a' == 97, and 'z' == 122
573  */
574  for (i = 0; i < sizeof(xascii); i++)
575  {
576  if ((i >= 65) && (i <= 90))
577  { /* isupper */
578  xascii[i] = i + 32; /* -> tolower */
579  }
580  else if ((i >= 97) && (i <= 122))
581  { /* islower */
582  xascii[i] = i - 32; /* -> toupper */
583  }
584  else
585  {
586  /* *foo = tolower((char)i); */
587  xascii[i] = (char) /*i*/0;
588  }
589  }
590 #ifdef STRSTR_DEBUG
591  /*
592  * Dump the table (debugging purposes only)
593  */
594  for (i = 0; i < sizeof (xascii); i++)
595  {
596  if (xascii[i])
597  {
598  printf(" %c%c ", (unsigned) i, xascii[i]);
599  }
600  else
601  {
602  printf("\\%03d ", (int) xascii[i]);
603  }
604  if (i & 16 == 15)
605  {
606  printf("\n");
607  }
608  }
609 #endif /* STRSTR_DEBUG */
610  }
611 #ifdef STRSTR_DEBUG
612  printf("DATA \"%s\"\nPATT \"%s\"\n", data, str);
613 #endif /* STRSTR_DEBUG */
614  if (data == NULL_STR || str == NULL_STR)
615  {
616  return (0);
617  }
618  alph = isalpha(*str);
619  if (alph)
620  {
621  firstx = xascii[(int) *str];
622 #ifdef STRSTR_DEBUG
623  printf("NOTE: first char (%c) is Alphabetic - alternate is (%c)\n",
624  *str, firstx);
625 #endif /* STRSTR_DEBUG */
626 #ifdef QA_CHECKS
627  if (firstx == NULL_CHAR)
628  {
629  LOG_FATAL("Unexpected initialization")
630  Bail(-__LINE__);
631  }
632 #endif /* QA_CHECKS */
633  }
634  for (bufp = data; /* *pattp && */*bufp; bufp = mark)
635  {
636 #ifdef STRSTR_DEBUG
637  printf("\nDEBUG: start, buffer = \"%s\"\n", bufp);
638 #endif /* STRSTR_DEBUG */
639  pattp = str;
640  /*
641  * Locate the first character of our target-pattern in the buffer...
642  */
643  while (*bufp)
644  {
645 #ifdef STRSTR_DEBUG
646  printf("... findfirst, *bufp is '%c' == [%c%c]?\n",
647  *bufp, *str, alph ? firstx : *str);
648 #endif /* STRSTR_DEBUG */
649  if (*bufp == *pattp)
650  {
651  break;
652  }
653  if (alph && (*bufp == firstx))
654  {
655  break;
656  }
657  bufp++;
658  }
659  if (*bufp == NULL_CHAR)
660  {
661  return (0);
662  }
663  save = bufp - data;
664  mark = ++bufp; /* could optimize this in loop below */
665 #ifdef STRSTR_DEBUG
666  printf("GOT IT, at offset %d (*mark now is '%c')\n",
667  bufp - data - 1, *mark);
668 #endif /* STRSTR_DEBUG */
669  /* optimizeMark = 1; */
670  for (++pattp; *bufp && *pattp; bufp++, pattp++)
671  {
672 #ifdef STRSTR_DEBUG
673  printf("STRING-COMPARE: %c == %c ??\n", *bufp, *pattp);
674 #endif /* STRSTR_DEBUG */
675  if (*bufp == *pattp)
676  {
677  continue;
678  }
679 #ifdef STRSTR_DEBUG
680  printf("... or perhaps: %c == %c ??\n", *bufp,
681  xascii[*pattp]);
682 #endif /* STRSTR_DEBUG */
683  if (((x = xascii[(int) *pattp])) && (*bufp == x))
684  {
685  continue;
686  }
687  break;
688  }
689  if (*pattp == NULL_CHAR)
690  {
691  matchPos->rm_so = save;
692  matchPos->rm_eo = save + strlen(str);
693  if (doSave)
694  {
695  saveData = data;
696  }
697  return (1); /* end of pattern == success */
698  }
699  if (*bufp == NULL_CHAR)
700  {
701  return (0); /* end of buffer == success */
702  }
703  }
704  return (0);
705 }
regmatch_t * getRegmatch_t(GArray *in, int index)
From a given array, get regex match from a given index.
Definition: nomos_regex.c:484
int optionIsSet(int val)
Check if an CLI option is set.
Definition: nomos_utils.c:568
int idxGrep_base(int index, char *data, int flags, int mode)
compile a regex, and perform the search (on data?)
Definition: nomos_regex.c:335
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
Definition: nomos_regex.c:231
static char regexErrbuf[myBUFSIZ]
Definition: nomos_regex.c:37
int s
The socket that the CLI will use to communicate.
Definition: fo_cli.c:48
static int getOffset(int posInDoctoredBuffer)
Get offset from doctored buffer.
Definition: nomos_regex.c:473
GArray * matchPositions
Match positions.
Definition: nomos.h:392
void Bail(int exitval)
Close connections and exit.
Definition: nomos_utils.c:541
int endsIn(char *s, char *suffix)
Check if a string ends with given suffix.
Definition: nomos_regex.c:67
int textInFile(char *pathname, char *regex, int flags)
Check if a regex passes in a file.
Definition: nomos_regex.c:111
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
Definition: nomos_regex.c:252
int start
Start position of match.
Definition: nomos.h:383
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
Definition: nomos_utils.c:909
GArray * theMatches
Definition: nomos.h:430
int strNbuf(char *data, char *str)
Check if a string exists in buffer (case insensitive)
Definition: nomos_regex.c:539
int lineInFile(char *pathname, char *regex)
Check if a line exists in a file.
Definition: nomos_regex.c:92
void Msg(const char *fmt,...)
DO NOT automatically add to a string passed to Msg(); in parseDistro, we sometimes want to dump a p...
Definition: util.c:1394
int flags
Flags.
Definition: nomos.h:361
#define NULL_CHAR
NULL character.
Definition: nomos.h:247
void Assert(int fatalFlag, const char *fmt,...)
Raise an assert.
Definition: util.c:1407
#define _REGEX(x)
Definition: nomos.h:460
int idxGrep(int index, char *data, int flags)
compile a regex, and perform the search (on data?)
Definition: nomos_regex.c:216
char * mmapFile(char *pathname)
Blarg. Files that are EXACTLY a multiple of the system pagesize do not get a NULL on the end of the b...
Definition: util.c:1094
void recordIndex(GArray *indexList, int index)
Add a given index to index list.
Definition: nomos_regex.c:463
int storeOneMatch(regmatch_t currentRegMatch, int lastmatch, GArray *allmatches, char **tmpData, char *data)
Store a single regex match to array.
Definition: nomos_regex.c:312
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
Definition: nomos_utils.c:921
int strNbuf_noGlobals(char *data, char *str, regmatch_t *matchPos, int doSave, char *saveData)
This is our own internal, case-insensitive version of strstr().
Definition: nomos_regex.c:549
int index
Enums from index (Entrynumber) in STRINGS.in.
Definition: nomos.h:385
licText_t licText[]
int end
End position of match.
Definition: nomos.h:384
#define NULL_STR
NULL string.
Definition: nomos.h:248
void rememberWhatWeFound(GArray *highlight, GArray *regmatch_tArray, int index, int mode)
Store regex matches in highlight array.
Definition: nomos_regex.c:496
int strGrep(char *regex, char *data, int flags)
General-purpose grep function, used for one-time-only searches.
Definition: nomos_regex.c:150
GArray * indexList
Definition: nomos.h:429
#define FL_SAVEBASE
Definition: nomos.h:168
int matchOnce(int isPlain, char *data, char *regex, regex_t *rp, regmatch_t *regmatch)
Perform a regex match on a given data and return only first match.
Definition: nomos_regex.c:293
void munmapFile(void *ptr)
Definition: util.c:1209
int idxGrep_recordIndex(int index, char *data, int flags)
compile a regex, perform the search and record index
Definition: nomos_regex.c:274
#define NO
Definition: nomos.h:184
char * regex
License regex.
Definition: nomos.h:448
GArray * keywordPositions
Definition: nomos.h:431
void regexError(int ret, regex_t *regc, char *regex)
Log an error caused by regex.
Definition: nomos_regex.c:49