FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
nomos.c
Go to the documentation of this file.
1 /***************************************************************
2  Copyright (C) 2006-2015 Hewlett-Packard Development Company, L.P.
3  Copyright (C) 2014, 2018 Siemens AG
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License
7  version 2 as published by the Free Software Foundation.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License along
15  with this program; if not, write to the Free Software Foundation, Inc.,
16  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 
18  ***************************************************************/
28 /* CDB - What is this define for??? */
29 #ifndef _GNU_SOURCE
30 #define _GNU_SOURCE
31 #endif /* not defined _GNU_SOURCE */
32 
33 #include "nomos.h"
34 #include "nomos_utils.h"
35 
36 extern licText_t licText[]; /* Defined in _autodata.c */
37 struct globals gl;
38 struct curScan cur;
39 
40 int schedulerMode = 0;
41 int Verbose = 0;
43 #define FUNCTION
44 
45 #ifdef COMMIT_HASH_S
46 char BuildVersion[]="nomos build version: " VERSION_S " r(" COMMIT_HASH_S ").\n";
47 #else
48 char BuildVersion[] = "nomos build version: NULL.\n";
49 #endif
50 
51 /* We're being run from the scheduler */
52 /* nomos agent starting up in scheduler mode... */
53 /* \ref http://www.fossology.org/projects/fossology/wiki/Nomos_Test_Cases*/
54 
67 void arsNomos(cacheroot_t* cacheroot, bool ignoreFilesWithMimeType) {
68  int i;
69  int upload_pk = 0;
70  int numrows;
71  int ars_pk = 0;
72  int user_pk = 0;
73  char *AgentARSName = "nomos_ars";
74  PGresult *result;
75 
76  char *repFile;
77 
78  schedulerMode = 1;
79  /* get user_pk for user who queued the agent */
80  user_pk = fo_scheduler_userID();
81  /* read upload_pk from scheduler */
82  while (fo_scheduler_next())
83  {
84  upload_pk = atoi(fo_scheduler_current());
85  if (upload_pk == 0)
86  continue;
87  /* Check Permissions */
88  if (GetUploadPerm(gl.pgConn, upload_pk, user_pk) < PERM_WRITE)
89  {
90  LOG_ERROR("You have no update permissions on upload %d", upload_pk);
91  continue;
92  }
93  result = checkDuplicateReq(gl.pgConn, upload_pk, gl.agentPk);
94  if (fo_checkPQresult(gl.pgConn, result, NULL, __FILE__, __LINE__))
95  Bail(-__LINE__);
96  if (PQntuples(result) != 0)
97  {
98  LOG_NOTICE("Ignoring requested nomos analysis of upload %d - Results are already in database.", upload_pk);
99  PQclear(result);
100  continue;
101  }
102  PQclear(result);
103 
104  /* Record analysis start in nomos_ars, the nomos audit trail. */
105  ars_pk = fo_WriteARS(gl.pgConn, ars_pk, upload_pk, gl.agentPk, AgentARSName, 0, 0);
106 
107  result = getSelectedPFiles(gl.pgConn, upload_pk, gl.agentPk, ignoreFilesWithMimeType);
108  if (fo_checkPQresult(gl.pgConn, result, NULL, __FILE__, __LINE__))
109  Bail(-__LINE__);
110  numrows = PQntuples(result);
111  /* process all files in this upload */
112  for (i = 0; i < numrows; i++)
113  {
114  initializeCurScan(&cur);
115  strcpy(cur.pFile, PQgetvalue(result, i, 1));
116  cur.pFileFk = atoi(PQgetvalue(result, i, 0));
117  repFile = fo_RepMkPath("files", cur.pFile);
118  if (!repFile)
119  {
120  LOG_FATAL("Nomos unable to open pfile_pk: %ld, file: %s", cur.pFileFk, cur.pFile);
121  Bail(-__LINE__);
122  }
123  /* make sure this is a regular file, ignore if not */
124  if (!isFILE(repFile))
125  continue;
126  processFile(repFile);
128  if (recordScanToDB(cacheroot, &cur))
129  {
130  LOG_FATAL("nomos terminating upload %d scan due to previous errors.", upload_pk);
131  Bail(-__LINE__);
132  }
133  freeAndClearScan(&cur);
134  }
135  PQclear(result);
136  /* Record analysis success in nomos_ars. */
137  fo_WriteARS(gl.pgConn, ars_pk, upload_pk, gl.agentPk, AgentARSName, 0, 1);
138  }
139 }
140 
149 void list_dir (const char * dir_name, int process_count, int *distribute_count, FILE **pFile)
150 {
151  struct dirent *dirent_handler;
152  DIR *dir_handler;
153 
154  if ((dir_handler = opendir(dir_name)) == NULL)
155  {
156  fprintf(stderr, "Can't open: %s, error: %s\n", dir_name, strerror(errno));
157  return;
158  }
159 
160  char filename_buf[PATH_MAX] = {}; // store one file path
161  struct stat stat_buf ;
162  int file_number = 0;
163  while ((dirent_handler = readdir(dir_handler)) != NULL)
164  {
165  /* get the file path, form the file path /dir_name/file_name,
166  e.g. dir_name is '/tmp' file_name is 'test_file_1.txt', form one path '/tmp/test_file_1.txt' */
167  sprintf( filename_buf , "%s/%s",dir_name, dirent_handler->d_name);
168 
169  if (stat(filename_buf, &stat_buf) == -1) // if can access the current file, return
170  {
171  LOG_FATAL("Unable to stat file: %s, error message: %s\n", filename_buf, strerror(errno)) ;
172  closedir(dir_handler);
173  return;
174  }
175 
176  /* 1) do not travel '..', '.' directory
177  2) when the file type is directory, travel it
178  3) when the file type is reguler file, write it into temp files on average (value from -n) */
179  if (strcmp (dirent_handler->d_name, "..") != 0 && strcmp (dirent_handler->d_name, ".") != 0)
180  {
181  /* the file type is a directory (exclude '..' and '.') */
182  if ((stat_buf.st_mode & S_IFMT) == S_IFDIR)
183  {
184  list_dir(filename_buf, process_count, distribute_count, pFile); // deep into this directory and travel it
185  }
186  else {
187  sprintf(filename_buf, "%s\n", filename_buf); // add one new line character by the end of one file path, one line is one file path
188  /* write on average process_count */
189  file_number = *distribute_count%process_count;
190  fwrite (filename_buf, sizeof(char), strlen(filename_buf), pFile[file_number]);
191  (*distribute_count)++; // increase the file count
192 
193  if (process_count == *distribute_count) *distribute_count = 0; // reset list_file_count each cycle
194  continue;
195  }
196  }
197  }
198  closedir(dir_handler);
199 }
200 
207 void read_file_grab_license(int file_number, FILE **pFile)
208 {
209  char *line = NULL;
210  size_t len = 0;
211  int lenth_tmp = 0;
212  ssize_t read = 0;
213 
214  /*read line by line, then start to scan licenses */
215  while ((read = getline(&line, &len, pFile[file_number])) != -1) {
216  if (line && line[0]) // line is not empty
217  {
218  lenth_tmp = strlen(line);
219  /* trim the line */
220  while(isspace(line[lenth_tmp - 1])) line[--lenth_tmp] = 0; // right trim
221  while(isspace(*line)) ++line; // left trim
222  //printf("line is:%s, getpid() is:%d\n", line, getpid());
223  }
224  initializeCurScan(&cur);
225  processFile(line); // start to scan licenses
226  } // while
227 
228  if (line) free(line);
229 }
230 
237 void myFork(int proc_num, FILE **pFile) {
238  pid_t pid;
239  pid = fork();
240 
241  if (pid < 0)
242  {
243  LOG_FATAL("fork failed\n");
244  }
245  else if (pid == 0) { // Child process, every single process runs on one temp path file
246  read_file_grab_license(proc_num, pFile); // grabbing licenses on /tmp/foss-XXXXXX
247  return;
248  }
249  else if (pid > 0) {
250  // if pid != 0, we're in the parent
251  // let's call ourself again, decreasing the counter, until it reaches 1.
252  if (proc_num > 1) {
253  myFork(proc_num - 1, pFile);
254  }
255  else
256  {
257  read_file_grab_license(0, pFile); // main(parent) process run on /tmp/foss-XXXXXX
258  }
259  }
260 }
261 
265 int main(int argc, char **argv)
266 {
267  int i;
268  int c;
269  int file_count = 0;
270  char *cp;
271  char sErrorBuf[1024];
272  char *agent_desc = "License Scanner";
273  char **files_to_be_scanned;
274  char *COMMIT_HASH = NULL;
275  char *VERSION = NULL;
276  char agent_rev[myBUFSIZ];
278  char *scanning_directory= NULL;
279  int process_count = 0;
280  bool ignoreFilesWithMimeType = false;
281 
282  /* connect to the scheduler */
283  fo_scheduler_connect(&argc, argv, &(gl.pgConn));
285 
286 #ifdef PROC_TRACE
287  traceFunc("== main(%d, %p)\n", argc, argv);
288 #endif /* PROC_TRACE */
289 
290 #ifdef MEMORY_TRACING
291  mcheck(0);
292 #endif /* MEMORY_TRACING */
293 #ifdef GLOBAL_DEBUG
294  gl.DEEBUG = gl.MEM_DEEBUG = 0;
295 #endif /* GLOBAL_DEBUG */
296 
297  files_to_be_scanned = calloc(argc, sizeof(char *));
298 
299  COMMIT_HASH = fo_sysconfig("nomos", "COMMIT_HASH");
300  VERSION = fo_sysconfig("nomos", "VERSION");
301  sprintf(agent_rev, "%s.%s", VERSION, COMMIT_HASH);
302 
303  gl.agentPk = fo_GetAgentKey(gl.pgConn, basename(argv[0]), 0, agent_rev, agent_desc);
304 
305  /* Record the progname name */
306  if ((cp = strrchr(*argv, '/')) == NULL_STR)
307  {
308  strncpy(gl.progName, *argv, sizeof(gl.progName));
309  }
310  else
311  {
312  while (*cp == '.' || *cp == '/')
313  cp++;
314  strncpy(gl.progName, cp, sizeof(gl.progName));
315  }
316 
317  if (putenv("LANG=C") < 0)
318  {
319  char * estr = strerror_r(errno, sErrorBuf, sizeof(sErrorBuf));
320  LOG_FATAL("Cannot set LANG=C in environment. Error: %s", estr)
321  Bail(-__LINE__);
322  }
323 
324  /* Save the current directory */
325  if (getcwd(gl.initwd, sizeof(gl.initwd)) == NULL_STR)
326  {
327  char *estr = strerror_r(errno, sErrorBuf, sizeof(sErrorBuf));
328  LOG_FATAL("Cannot obtain starting directory. Error: %s", estr)
329  Bail(-__LINE__);
330  }
331 
332  /* default paragraph size (# of lines to scan above and below the pattern) */
333  gl.uPsize = 6;
334 
335  /* Build the license ref cache to hold 2**11 (2048) licenses.
336  This MUST be a power of 2.
337  */
338  cacheroot.maxnodes = 2 << 11;
339  cacheroot.nodes = calloc(cacheroot.maxnodes, sizeof(cachenode_t));
340  if (!initLicRefCache(&cacheroot))
341  {
342  LOG_FATAL("Nomos could not allocate %d cacheroot nodes.", cacheroot.maxnodes)
343  Bail(-__LINE__);
344  }
345 
346  /* Process command line options */
347  while ((c = getopt(argc, argv, "VJSNvhiIlc:d:n:")) != -1)
348  {
349  switch (c) {
350  case 'c': break; /* handled by fo_scheduler_connect() */
351  case 'i':
352  /* "Initialize" */
353  Bail(0); /* DB was opened above, now close it and exit */
354  case 'l':
355  /* set long command line output */
356  gl.progOpts |= OPTS_LONG_CMD_OUTPUT;
357  break;
358  case 'v':
359  Verbose++; break;
360  case 'J':
361  gl.progOpts |= OPTS_JSON_OUTPUT;
362  break;
363  case 'S':
364  gl.progOpts |= OPTS_HIGHLIGHT_STDOUT;
365  break;
366  case 'N':
367  gl.progOpts |= OPTS_NO_HIGHLIGHTINFO;
368  break;
369  case 'V':
370  printf("%s", BuildVersion);
371  Bail(0);
372  case 'd': /* diretory to scan */
373  gl.progOpts |= OPTS_SCANNING_DIRECTORY;
374  scanning_directory = optarg;
375  struct stat dir_sta;
376  int ret = stat(scanning_directory, &dir_sta);
377  if (-1 == ret || S_IFDIR != (dir_sta.st_mode & S_IFMT))
378  {
379  if (-1 == ret) printf("stat('%s') error message: %s.\n",scanning_directory, strerror(errno));
380  else printf("Warning: '%s' from -d is not a good directory(dir_sta.st_mode & S_IFMT = %o).\n", scanning_directory, dir_sta.st_mode & S_IFMT);
381  Usage(argv[0]);
382  Bail(-__LINE__);
383  }
384  break;
385  case 'n': /* spawn mutiple processes to scan */
386  process_count = atoi(optarg);
387  break;
388  case 'I':
389  ignoreFilesWithMimeType = true;
390  break;
391  case 'h':
392  default:
393  Usage(argv[0]);
394  Bail(-__LINE__);
395  }
396  }
397 
398 
399  /* Copy filename args (if any) into array */
400  for (i = optind; i < argc; i++)
401  {
402  files_to_be_scanned[file_count] = argv[i];
403  file_count++;
404  }
405 
406  licenseInit();
407  gl.flags = 0;
408 
409  if (file_count == 0 && !scanning_directory)
410  {
411  arsNomos(&cacheroot, ignoreFilesWithMimeType);
412  }
413  else
414  { /******** Files on the command line ********/
415  FILE **pFile = NULL; // store temp file descriptors
416  char (*pTempFileName)[50] = NULL; // store temp file names, they are looking like /tmp/foss-XXXXXX
417  pid_t mainPid = 0; // main process id
418  cur.cliMode = 1;
419 
420  /* when scanning_directory is real direcotry, scan license in parallel */
421  if (scanning_directory) {
422  if (process_count < 2) process_count = 2; // the least count is 2, at least has one child process
423  if (mutexJson == NULL && optionIsSet(OPTS_JSON_OUTPUT))
424  {
425  initializeJson();
426  printf("{\n\"results\":[\n");
427  fflush(0);
428  }
429  pFile = malloc(process_count*(sizeof(FILE*)));
430  pTempFileName = malloc(process_count*sizeof(char[50]));
431  int i = 0;
432  int file_descriptor = 0;
433  for(i = 0; i < process_count; i++)
434  {
435  /* create temp file */
436  char file_template[] = "/tmp/foss-XXXXXX"; // 'XXXXXX' will be replaced after mkstemp
437  file_descriptor = mkstemp(file_template);
438 
439  /* get the temp path file distriptors */
440  pFile[i] = fdopen(file_descriptor, "w"); // open the files to write later
441  if (!pFile[i])
442  {
443  LOG_FATAL("failed to open %s, %s\n", file_template, strerror(errno));
444  }
445  strcpy(pTempFileName[i], file_template); // store temp file names
446  }
447 
448  /* walk through the specified directory to get all the file(file path) and
449  store into mutiple files - /tmp/foss-XXXXXX */
450  int distribute_count = 0; // record how many files are found in one directory
451  list_dir(scanning_directory, process_count, &distribute_count, pFile); // list and store files into /tmp/foss-XXXXXX in one directory
452 
453  /* after the walking through and writing job is done, close all the temp path file distriptors.
454  then open the temp path files to read */
455  for(i = 0; i < process_count; i++)
456  {
457  if (pFile[i]) fclose(pFile[i]); // write all the paths
458  pFile[i] = fopen(pTempFileName[i], "r"); // open the temp files to read
459  }
460 
461  /* create process_count - 1 child processes(please do not forget we always have the main process) */
462  mainPid = getpid(); // get main process id
463  myFork(process_count - 1, pFile); // spawn process_count - 1 chile processes and grab licenses through process_count processes
464  int status = 0;
465  pid_t wpid = 0;
466  if (mainPid == getpid())
467  {
468  /* wait all processes done. */
469  while(1){
470  wpid = wait(&status);
471  if (-1 == wpid) break;
472  }
473 
474  /* close the opening files, then delete the temp path files */
475  for(i = 0; i < process_count; i++)
476  {
477  if (pFile[i])
478  {
479  fclose(pFile[i]);
480  unlink(pTempFileName[i]);
481  }
482  }
483 
484  if (optionIsSet(OPTS_JSON_OUTPUT))
485  {
486  printf("]\n}\n");
487  destroyJson();
488  }
489 
490  /* free memeory */
491  free(pFile);
492  free(pTempFileName);
493  }
494  }
495  else {
496  if (0 != process_count)
497  {
498  printf("Warning: -n {nprocs} ONLY works with -d {directory}.\n");
499  }
500  if (optionIsSet(OPTS_JSON_OUTPUT))
501  {
502  initializeJson();
503  printf("{\n\"results\":[\n");
504  fflush(0);
505  }
506  for (i = 0; i < file_count; i++) {
507  initializeCurScan(&cur);
508  processFile(files_to_be_scanned[i]);
509  recordScanToDB(&cacheroot, &cur);
510  freeAndClearScan(&cur);
511  }
512  if (optionIsSet(OPTS_JSON_OUTPUT))
513  {
514  printf("]\n}\n");
515  destroyJson();
516  }
517  }
518  }
519 
520  lrcache_free(&cacheroot); // for valgrind
521 
522  /* Normal Exit */
523  Bail(0);
524 
525  /* this will never execute but prevents a compiler warning about reaching
526  the end of a non-void function */
527  return (0);
528 }
int Verbose
Verbose level.
Definition: nomos.c:41
int isFILE(char *pathname)
Check if an inode is a file.
Definition: util.c:1352
FUNCTION int recordScanToDB(cacheroot_t *pcroot, struct curScan *scanRecord)
Write out the information about the scan to the FOSSology database.
Definition: nomos_utils.c:858
int fo_checkPQresult(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres SELECT.
Definition: libfossdb.c:181
int maxnodes
No. of nodes in the list.
Definition: liccache.h:53
cachenode_t * nodes
Array of nodes.
Definition: liccache.h:54
char BuildVersion[]
Definition: buckets.c:79
int cliMode
Definition: nomos.h:425
int optionIsSet(int val)
Check if an CLI option is set.
Definition: nomos_utils.c:568
void destroyJson()
Definition: json_writer.c:120
int schedulerMode
Definition: nomos.c:40
Structure holding data truly global in that it remains consistent for each file scanned.
Definition: nomos.h:357
char * fo_scheduler_current()
Get the last read string from the scheduler.
void Bail(int exitval)
Close connections and exit.
Definition: nomos_utils.c:541
void fo_scheduler_connect(int *argc, char **argv, PGconn **db_conn)
Establish a connection between an agent and the scheduler.
void arsNomos(cacheroot_t *cacheroot, bool ignoreFilesWithMimeType)
Make entry in ars table for audit.
Definition: nomos.c:67
char pFile[myBUFSIZ]
Definition: nomos.h:410
char initwd[myBUFSIZ]
CDB, would like to workaround/eliminate.
Definition: nomos.h:358
FUNCTION int processFile(PGconn *pgConn, pbucketdef_t bucketDefArray, puploadtree_t puploadtree, int agent_pk, int hasPrules)
Process a file.
Definition: walk.c:178
int flags
Flags.
Definition: nomos.h:361
fo_dbManager * dbManager
FOSSology DB manager.
Definition: nomos.h:376
FUNCTION void lrcache_free(cacheroot_t *pcroot)
Free the hash table.
Definition: liccache.c:83
int uPsize
Size.
Definition: nomos.h:362
int progOpts
CLI options.
Definition: nomos.h:360
void myFork(int proc_num, FILE **pFile)
the recursive create process and process grabbing licenses
Definition: nomos.c:237
fo_dbManager * fo_dbManager_new(PGconn *dbConnection)
Create and initialize new fo_dbManager object.
Definition: standalone.c:28
void licenseInit()
license initialization
Definition: licenses.c:82
int fo_scheduler_userID()
Gets the id of the user that created the job that the agent is running.
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:75
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
Definition: nomos_utils.c:949
licText_t licText[]
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:364
void read_file_grab_license(int file_number, FILE **pFile)
read line by line, then call processFile to grab license line by line
Definition: nomos.c:207
int main(int argc, char **argv)
Definition: nomos.c:265
#define NULL_STR
NULL string.
Definition: nomos.h:248
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
Definition: libfossagent.c:172
char * fo_scheduler_next()
Get the next data to process from the scheduler.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:228
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
void list_dir(const char *dir_name, int process_count, int *distribute_count, FILE **pFile)
list all files and store file paths from the specified directory
Definition: nomos.c:149
Nomos header file.
FUNCTION int GetUploadPerm(PGconn *pgConn, long UploadPk, int user_pk)
Get users permission to this upload.
Definition: libfossagent.c:385
void initializeJson()
Definition: json_writer.c:111
Struct that tracks state related to current file being scanned.
Definition: nomos.h:404
PGresult * getSelectedPFiles(PGconn *pgConn, int uploadPk, int agentPk, bool ignoreFilesWithMimeType)
Get the upload_pk, agent_pk and ignoreFilesWithMimeType to get all the file Ids for nomos...
Definition: libfossagent.c:476
const char * upload_pk
Definition: sqlstatements.h:93
int agentPk
Agent id.
Definition: nomos.h:372
long pFileFk
Definition: nomos.h:409
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
FUNCTION int initLicRefCache(cacheroot_t *pcroot)
build a cache the license ref db table.
Definition: nomos_utils.c:268
PGresult * checkDuplicateReq(PGconn *pgConn, int uploadPk, int agentPk)
Get the upload_pk and agent_pk to find out the agent has already scanned the package.
Definition: libfossagent.c:449
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.
Definition: nomos_utils.c:934
char progName[64]
Program name.
Definition: nomos.h:359
#define PERM_WRITE
Read-Write permission.
Definition: libfossology.h:45
PGconn * pgConn
DB Connection.
Definition: nomos.h:375