FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
scheduler.c
1 /*
2 Author: Daniele Fognini
3 Copyright (C) 2013-2014, Siemens AG
4 
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18 
19 #include "scheduler.h"
20 
21 #include "common.h"
22 #include "database.h"
23 
24 MatchCallbacks schedulerCallbacks =
25  { .onNo = sched_onNoMatch,
26  .onFull = sched_onFullMatch,
27  .onDiff = sched_onDiffMatch,
28  .onBeginOutput = sched_noop,
29  .onBetweenIndividualOutputs = sched_noop,
30  .onEndOutput = sched_noop,
31  .ignore = sched_ignore
32  };
33 
34 int processUploadId(MonkState* state, int uploadId, const Licenses* licenses) {
35  PGresult* fileIdResult = queryFileIdsForUpload(state->dbManager, uploadId, state->ignoreFilesWithMimeType);
36 
37  if (!fileIdResult)
38  return 0;
39 
40  if (PQntuples(fileIdResult) == 0) {
41  PQclear(fileIdResult);
43  return 1;
44  }
45 
46  int threadError = 0;
47 #ifdef MONK_MULTI_THREAD
48  #pragma omp parallel
49 #endif
50  {
51  MonkState threadLocalStateStore = *state;
52  MonkState* threadLocalState = &threadLocalStateStore;
53 
54  threadLocalState->dbManager = fo_dbManager_fork(state->dbManager);
55  if (threadLocalState->dbManager) {
56  int count = PQntuples(fileIdResult);
57 #ifdef MONK_MULTI_THREAD
58  #pragma omp for schedule(dynamic)
59 #endif
60  for (int i = 0; i < count; i++) {
61  if (threadError)
62  continue;
63 
64  long pFileId = atol(PQgetvalue(fileIdResult, i, 0));
65 
66  if ((pFileId <= 0) || hasAlreadyResultsFor(threadLocalState->dbManager, threadLocalState->agentId, pFileId))
67  {
69  continue;
70  }
71 
72  if (matchPFileWithLicenses(threadLocalState, pFileId, licenses, &schedulerCallbacks)) {
74  } else {
76  threadError = 1;
77  }
78  }
79  fo_dbManager_finish(threadLocalState->dbManager);
80  } else {
81  threadError = 1;
82  }
83  }
84  PQclear(fileIdResult);
85 
86  return !threadError;
87 }
88 
89 int handleSchedulerMode(MonkState* state, const Licenses* licenses) {
90  /* scheduler mode */
91  state->scanMode = MODE_SCHEDULER;
92  queryAgentId(state, AGENT_NAME, AGENT_DESC);
93 
94  while (fo_scheduler_next() != NULL) {
95  int uploadId = atoi(fo_scheduler_current());
96 
97  if (uploadId == 0) continue;
98 
99  int arsId = fo_WriteARS(fo_dbManager_getWrappedConnection(state->dbManager),
100  0, uploadId, state->agentId, AGENT_ARS, NULL, 0);
101 
102  if (arsId<=0)
103  bail(state, 1);
104 
105  if (!processUploadId(state, uploadId, licenses))
106  bail(state, 2);
107 
108  fo_WriteARS(fo_dbManager_getWrappedConnection(state->dbManager),
109  arsId, uploadId, state->agentId, AGENT_ARS, NULL, 1);
110  }
112 
113  return 1;
114 }
115 
116 int sched_onNoMatch(MonkState* state, const File* file) {
117  return saveNoResultToDb(state->dbManager, state->agentId, file->id);
118 }
119 
120 int sched_onFullMatch(MonkState* state, const File* file, const License* license, const DiffMatchInfo* matchInfo) {
121  fo_dbManager* dbManager = state->dbManager;
122  const int agentId = state->agentId;
123  const long fileId = file->id;
124 
125 #ifdef DEBUG
126  printf("found full match between (pFile=%ld) and \"%s\" (rf_pk=%ld)\n", file->id, license->shortname, license->refId);
127 #endif //DEBUG
128 
129  fo_dbManager_begin(dbManager);
130 
131  int success = 0;
132  long licenseFileId = saveToDb(dbManager, agentId, license->refId, fileId, 100);
133  if (licenseFileId > 0) {
134  success = saveDiffHighlightToDb(dbManager, matchInfo, licenseFileId);
135  }
136 
137  if (success) {
138  fo_dbManager_commit(dbManager);
139  } else {
140  fo_dbManager_rollback(dbManager);
141  }
142  return success;
143 }
144 
145 int sched_onDiffMatch(MonkState* state, const File* file, const License* license, const DiffResult* diffResult) {
146  fo_dbManager* dbManager = state->dbManager;
147  const int agentId = state->agentId;
148  const long fileId = file->id;
149 
150  unsigned short matchPercent = diffResult->percentual;
151 
152 #ifdef DEBUG
153  printf("found diff match between (pFile=%ld) and \"%s\" (rf_pk=%ld); ", file->id, license->shortname, license->refId);
154  printf("%u%%; ", diffResult->percentual);
155 
156  char * formattedMatchArray = formatMatchArray(diffResult->matchedInfo);
157  printf("diffs: {%s}\n", formattedMatchArray);
158  free(formattedMatchArray);
159 #endif //DEBUG
160 
161  fo_dbManager_begin(dbManager);
162 
163  int success = 0;
164  long licenseFileId = saveToDb(dbManager, agentId, license->refId, fileId, matchPercent);
165  if (licenseFileId > 0) {
166  success = saveDiffHighlightsToDb(dbManager, diffResult->matchedInfo, licenseFileId);
167  }
168 
169  if (success) {
170  fo_dbManager_commit(dbManager);
171  } else {
172  fo_dbManager_rollback(dbManager);
173  }
174 
175  return success;
176 }
177 
178 /* check if we have other results for this file.
179  * We do it now to minimize races with a concurrent scan of this file:
180  * the same file could be inside more than upload
181  */
182 int sched_ignore(MonkState* state, const File* file)
183 {
184  return hasAlreadyResultsFor(state->dbManager, state->agentId, file->id);
185 }
186 
187 #pragma GCC diagnostic push
188 #pragma GCC diagnostic ignored "-Wunused-parameter"
189 int sched_noop(MonkState* state) {
190  return 1;
191 }
192 #pragma GCC diagnostic pop
Definition: monk.h:78
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
Definition: monk.h:55
Definition: monk.h:72
Definition: nomos.h:439
char * fo_scheduler_current()
Get the last read string from the scheduler.
Definition: monk.h:66
bool processUploadId(const CopyrightState &state, int agentId, int uploadId, CopyrightDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType)
Process a given upload id, scan from statements and add to database.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:28
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
char * fo_scheduler_next()
Get the next data to process from the scheduler.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:228
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
PGresult * queryFileIdsForUpload(fo_dbManager *dbManager, int uploadId, bool ignoreFilesWithMimeType)
Get all file IDs (pfile_fk) for a given upload.
Definition: libfossagent.c:73
void matchPFileWithLicenses(CopyrightState const &state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler &databaseHandler)
Get the file contents, scan for statements and save findings to database.