23 #include "copyrightUtils.hpp" 24 #include <boost/program_options.hpp> 38 char* COMMIT_HASH =
fo_sysconfig(AGENT_NAME,
"COMMIT_HASH");
41 if (!asprintf(&agentRevision,
"%s.%s", VERSION, COMMIT_HASH))
47 AGENT_NAME, 0, agentRevision, AGENT_DESC);
90 std::vector<std::string>& fileNames, std::string& directoryToScan)
94 boost::program_options::options_description desc(IDENTITY
": recognized options");
96 (
"help,h",
"shows help")
99 boost::program_options::value<unsigned>(&type)
100 ->default_value(ALL_TYPES),
101 "type of regex to try" 104 "verbose,v",
"increase verbosity" 108 boost::program_options::value<vector<string> >(),
109 "user defined Regex to search: [{name=cli}@@][{matchingGroup=0}@@]{regex} e.g. 'linux@@1@@(linus) torvalds'" 113 boost::program_options::value< vector<string> >(),
117 "json,J",
"output JSON" 120 "ignoreFilesWithMimeType,I",
"ignoreFilesWithMimeType" 123 "config,c", boost::program_options::value<string>(),
"path to the sysconfigdir" 126 "scheduler_start",
"specifies, that the command was called by the scheduler" 129 "userID", boost::program_options::value<int>(),
"the id of the user that created the job (only in combination with --scheduler_start)" 132 "groupID", boost::program_options::value<int>(),
"the id of the group of the user that created the job (only in combination with --scheduler_start)" 135 "jobId", boost::program_options::value<int>(),
"the id of the job (only in combination with --scheduler_start)" 138 "directory,d", boost::program_options::value<string>(),
"directory to scan (recursive)" 142 boost::program_options::positional_options_description p;
145 boost::program_options::variables_map vm;
149 boost::program_options::store(
150 boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
152 type = vm[
"type"].as<
unsigned>();
154 if ((vm.count(
"help") > 0) || (type > ALL_TYPES))
156 cout << desc << endl;
160 if (vm.count(
"files"))
162 fileNames = vm[
"files"].as<std::vector<string> >();
165 unsigned long verbosity = vm.count(
"verbose");
166 bool json = vm.count(
"json") > 0 ?
true :
false;
167 bool ignoreFilesWithMimeType = vm.count(
"ignoreFilesWithMimeType") > 0 ?
true :
false;
169 dest =
CliOptions(verbosity, type, json, ignoreFilesWithMimeType);
171 if (vm.count(
"regex"))
173 const std::vector<std::string>& userRegexesFmts = vm[
"regex"].as<vector<std::string> >();
174 for (
auto it = userRegexesFmts.begin(); it != userRegexesFmts.end(); ++it) {
178 cout <<
"cannot parse regex format : " << *it << endl;
188 if (vm.count(
"directory"))
190 if (vm.count(
"files"))
192 cout <<
"cannot pass files and directory at the same time" << endl;
193 cout << desc << endl;
197 directoryToScan = vm[
"directory"].as<std::string>();
202 catch (boost::bad_any_cast&) {
203 cout <<
"wrong parameter type" << endl;
204 cout << desc << endl;
207 catch (boost::program_options::error&)
209 cout <<
"wrong command line arguments" << endl;
210 cout << desc << endl;
222 #ifdef IDENTITY_COPYRIGHT 254 #define RGX_FMT_SEPARATOR "@@" 255 auto fmtRegex = rx::regex(
256 "(?:([[:alpha:]]+)" RGX_FMT_SEPARATOR
")?(?:([[:digit:]]+)" RGX_FMT_SEPARATOR
")?(.*)",
257 rx::regex_constants::icase
260 rx::match_results<std::string::const_iterator>
match;
261 if (rx::regex_match(regexDesc.begin(), regexDesc.end(), match, fmtRegex))
263 std::string type(match.length(1) > 0 ? match.str(1) : defaultType.c_str());
265 int regId = match.length(2) > 0 ? std::stoi(std::string(match.str(2))) : 0;
267 if (match.length(3) == 0)
270 std::istringstream stream;
271 stream.str(type +
"=" + match.str(3));
303 if (!copyrightDatabaseHandler.
begin())
309 for (
auto m = matches.begin(); m != matches.end(); ++m)
318 entry.
type = m->type;
320 if (entry.
content.length() != 0)
325 copyrightDatabaseHandler.
rollback();
331 return copyrightDatabaseHandler.
commit();
345 const list<unptr::shared_ptr<scanner>>& scanners = state.
getScanners();
346 for (
auto sc = scanners.begin(); sc != scanners.end(); ++sc)
348 (*sc)->ScanString(sContent, l);
372 cout <<
"File not found " << pFileId << endl;
376 char* fileName = NULL;
378 #pragma omp critical (repo_mk_path) 393 cout <<
"PFile not found in repo " << pFileId << endl;
412 vector<unsigned long> fileIds = databaseHandler.
queryFileIdsForUpload(agentId, uploadId, ignoreFilesWithMimeType);
418 size_t pFileCount = fileIds.size();
420 for (
size_t it = 0; it < pFileCount; ++it)
422 unsigned long pFileId = fileIds[it];
445 const string fileName)
447 const list<unptr::shared_ptr<scanner>>& scanners = state.
getScanners();
448 list<match> matchList;
459 for (
auto sc = scanners.begin(); sc != scanners.end(); ++sc)
461 (*sc)->ScanString(s, matchList);
464 return make_pair(s, matchList);
475 const std::pair<
string, list<match>> resultPair,
bool &printComma)
478 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16)) 480 Json::FastWriter jsonWriter;
484 Json::StreamWriterBuilder jsonWriter;
485 jsonWriter[
"commentStyle"] =
"None";
486 jsonWriter[
"indentation"] =
"";
489 if (resultPair.first.empty())
491 result[
"file"] = fileName;
492 result[
"results"] =
"Unable to read file";
496 list<match> resultList = resultPair.second;
498 for (
auto m : resultList)
501 j[
"start"] = m.start;
504 j[
"content"] =
cleanMatch(resultPair.first, m);
507 result[
"file"] = fileName;
508 result[
"results"] = results;
511 #pragma omp critical (jsonPrinter) 522 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16)) 525 jsonString = jsonWriter.write(result);
526 jsonString.replace(jsonString.find(
"\n"), string(
"\n").length(),
"");
529 jsonString = Json::writeString(jsonWriter, result);
531 cout <<
" " << jsonString << flush;
541 const std::pair<
string, list<match>> resultPair)
543 if (resultPair.first.empty())
545 cout << fileName <<
" :: Unable to read file" << endl;
549 ss << fileName <<
" ::" << endl;
551 list<match> resultList = resultPair.second;
552 for (
auto m = resultList.begin(); m != resultList.end(); ++m)
554 ss <<
"\t[" << m->start <<
':' << m->end <<
':' << m->type <<
"] '" bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
void printResultToStdout(const std::string fileName, const std::pair< string, list< match >> resultPair)
const std::list< unptr::shared_ptr< scanner > > & getScanners() const
Get available scanner s.
Store the results of a regex match.
std::vector< unsigned long > queryFileIdsForUpload(int agentId, int uploadId, bool ignoreFilesWithMimeType)
Get the list of pfile ids on which the given agent has no findings for a given upload.
bool commit() const
COMMIT a transaction block in DB.
int s
The socket that the CLI will use to communicate.
CopyrightState getState(CliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
bool rollback() const
ROLLBACK a transaction block in DB.
void appendToJson(const std::string fileName, const std::pair< string, list< match >> resultPair, bool &printComma)
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
bool insertInDatabase(DatabaseEntry &entry) const
Insert a finding in database.
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
Manages database related requests for agent.
PGconn * getConnection() const
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler)
Scan a given file with all available scanners and save findings to database.
scanner * makeRegexScanner(const std::string ®exDesc, const std::string &defaultType)
Make a boost regex scanner object based on regex desc and type.
std::string type
Type of statement found.
Provides a regex scanner using predefined regexs.
Abstract class to provide interface to scanners.
void addScanner(scanner *regexDesc)
Add scanner to CliOptions.
static void addDefaultScanners(CopyrightState &state)
Add default scanners to the agent state.
Holds information about state of one agent.
bool processUploadId(const CopyrightState &state, int agentId, int uploadId, CopyrightDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType)
Process a given upload id, scan from statements and add to database.
bool parseCliOptions(int argc, char **argv, CliOptions &dest, std::vector< std::string > &fileNames, std::string &directoryToScan)
Parse the options sent by CLI to CliOptions object.
void addScanner(scanner *scanner)
Add scanner to state.
const CliOptions & getCliOptions() const
Get the CliOptions set by user.
bool begin() const
BEGIN a transaction block in DB.
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
fo_dbManager * dbManager
fo_dbManager object
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
bool saveToDatabase(const string &s, const list< match > &matches, unsigned long pFileId, int agentId, const CopyrightDatabaseHandler ©rightDatabaseHandler)
Save findings to the database if agent was called by scheduler.
Store the options sent through the CLI.
int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager &dbManager)
Call C function fo_WriteARS() and translate the arguments.
Implementation of scanner class for copyright.
void matchPFileWithLicenses(CopyrightState const &state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler &databaseHandler)
Get the file contents, scan for statements and save findings to database.
Maps agent data to database schema.
pair< string, list< match > > processSingleFile(const CopyrightState &state, const string fileName)
unsigned int getOptType() const
Get the opt type set by CliOptions.
string cleanMatch(const string &sText, const match &m)
Clean the text based on type.
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
CopyrightDatabaseHandler spawn() const
Spawn/fork a new database handler and return it.