FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
fo_antelink.php
1 #!/usr/bin/php
2 <?php
3 /***********************************************************
4  Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License
8  version 2 as published by the Free Software Foundation.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License along
16  with this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  ***********************************************************/
19 
27 // $DATAROOTDIR and $PROJECT come from Makefile
28 //require_once "$DATAROOTDIR/$PROJECT/lib/php/bootstrap.php";
29 require_once "/usr/local/share/fossology/lib/php/bootstrap.php";
30 
31 // NOTE THIS IS A PRIVATE KEY - read from file acme.key
32 $acmekey = file_get_contents("acme.key");
33 // Antepedia Computing Machinery Engine (acme) url
34 $acmebaseurl = 'https://api.antepedia.com/acme/v3';
35 //$acmequeryurl = $acmebaseurl . "/fquery/$acmekey"; // Full query, slow. Antelink support recommends only sending one sha1 at a time.
36 $acmeBinaryqueryurl = $acmebaseurl . "/bquery/$acmekey";
37 $acmequeryurl = $acmebaseurl . "/squery/$acmekey";
38 $acmekeycheckurl = $acmebaseurl . "/checkey/$acmekey";
39 
40 $SysConf = array(); // fo system configuration variables
41 $PG_CONN = 0; // Database connection
42 
43 /* Set SYSCONFDIR and set global (for backward compatibility) */
44 $SysConf = bootstrap();
45 
46 /* Initialize global system configuration variables $SysConfig[] */
47 ConfigInit($SYSCONFDIR, $SysConf);
48 
49 /* Check Antelink acme key */
50 $ch = curl_init($acmekeycheckurl);
51 SetCurlArgs($ch);
52 $contents = curl_exec( $ch );
53 $response=json_decode($contents);
54 curl_close( $ch );
55 if (! $response->authorized) {
56  echo "Invalid antelink acme key.\n";
57  exit;
58 }
59 
60 // Maximum number of sha1's to send to antelink in a single batch
61 $MaxBinarySend = 500;
62 $MaxSend = 10;
63 
64 /* -p -u {upload_pk} -t {tag_pk}
65  * -u and -t are manditory
66  */
67 $Options = getopt("vpt:u:");
68 if ( array_key_exists('t', $Options)
69  && array_key_exists('u', $Options)
70  ) {
71  $tag_pk = $Options['t'];
72  $upload_pk = $Options['u'];
73 } else {
74  echo "Fatal: Missing parameter\n";
75  Usage($argc, $argv);
76  exit -1;
77 }
78 
79 $PrintOnly = ( array_key_exists('p', $Options)) ? true : false;
80 $Verbose = ( array_key_exists('v', $Options)) ? true : false;
81 
82 //$sql = "select distinct(pfile_fk), pfile_sha1, ufile_name from uploadtree,pfile where upload_fk='$upload_pk' and pfile_pk=pfile_fk";
83 $sql = "SELECT pfile_pk, pfile_sha1, ufile_name, acme_pfile_pk FROM (SELECT distinct(pfile_fk) AS PF, ufile_name FROM uploadtree
84 WHERE upload_fk='$upload_pk' and (ufile_mode&x'10000000'::int)=0) as SS
85 inner join pfile on (PF=pfile_pk)
86 left join acme_pfile on (PF=acme_pfile.pfile_fk) where acme_pfile_pk is null;";
87 $result = pg_query($PG_CONN, $sql);
88 DBCheckResult($result, $sql, __FILE__, __LINE__);
89 if (pg_num_rows($result) == 0) {
90  echo "Empty upload_pk $upload_pk\n";
91  exit;
92 }
93 
94 
95 /* loop through each row identifying each as foss or not
96  * Put the FOSS SHA1 into an array to send to the squery server.
97  * This two step process is needed because bquery can handle requests of 500 hashes
98  * but squery can only handle requests of 10 hashes. */
99 $MasterFOSSarray = array();
100 $ToAntelink = array();
101 $FoundFOSSfiles = 0;
102 $PrecheckFileCount = 0;
103 while ($row = pg_fetch_assoc($result)) {
104  $PrecheckFileCount++;
105  $ToAntelink[] = $row;
106  if (count($ToAntelink) >= $MaxBinarySend) {
107  if ($Verbose) {
108  echo "Precheck $PrecheckFileCount, found $FoundFOSSfiles\n";
109  }
110  $FoundFOSSfiles += QueryBinaryServer($ToAntelink, $MasterFOSSarray);
111  $ToAntelink = array();
112  }
113 }
114 pg_free_result($result);
115 if (count($ToAntelink)) {
116  $FoundFOSSfiles += QueryBinaryServer($ToAntelink, $MasterFOSSarray);
117  if ($Verbose) {
118  echo "Precheck $PrecheckFileCount, found $FoundFOSSfiles\n";
119  }
120 }
121 
122 /* loop through each row accumulating groups of $MaxSend files (sha1's) to send to antelink */
123 $ToAntelink = array();
124 $TaggedFileCount = 0;
125 $TotalFileCount = 0;
126 foreach ($MasterFOSSarray as $row) {
127  $TotalFileCount++;
128  $ToAntelink[] = $row;
129  if (count($ToAntelink) >= $MaxSend) {
130  $TaggedFileCount += QueryTag($ToAntelink, $tag_pk, $PrintOnly, $Verbose);
131  $ToAntelink = array();
132  }
133 }
134 
135 if (count($ToAntelink)) {
136  $TaggedFileCount += QueryTag($ToAntelink, $tag_pk, $PrintOnly, $Verbose);
137 }
138 
139 echo "$TaggedFileCount files tagged out of $TotalFileCount files.\n";
140 
141 return (0);
142 
143 
150 function QueryBinaryServer($ToAntelink, &$MasterFOSSarray)
151 {
152  global $PG_CONN;
153  global $acmeBinaryqueryurl;
154 
155  $NumFound = 0;
156 
157  /* construct array of just sha1's */
158  $sha1array = array();
159  foreach ($ToAntelink as $row) {
160  $sha1array[] = $row['pfile_sha1'];
161  }
162  $PostData = json_encode($sha1array);
163 
164  $curlch = curl_init($acmeBinaryqueryurl);
165  SetCurlArgs($curlch);
166 
167  curl_setopt($curlch, CURLOPT_POST, TRUE);
168  curl_setopt($curlch,CURLOPT_POSTFIELDS, $PostData);
169  curl_setopt($curlch, CURLOPT_RETURNTRANSFER, TRUE);
170 
171  //getting response from server
172  $curlresponse = curl_exec($curlch);
173 
174  if (curl_errno($curlch)) {
175  // Fatal: display curl errors
176  echo "Error " . curl_errno($curlch) . ": " . curl_error($curlch) . "\n";
177  return $NumFound;
178  }
179 
180  //closing the curl
181  curl_close($curlch);
182 
183  $response = json_decode($curlresponse);
184 
185  // print any errors
186  if ($response->error) {
187  echo $response->error . "\n";
188  }
189 
190  /* Add tag or print */
191  if (is_array($response->results)) {
192  foreach ($response->results as $result) {
193  $row = GetRawRow($result->sha1, $ToAntelink);
194  $MasterFOSSarray[] = $row;
195  }
196  }
197 
198  return $NumFound;
199 }
200 
201 
210 function QueryTag($ToAntelink, $tag_pk, $PrintOnly, $Verbose)
211 {
212  global $PG_CONN;
213  global $acmequeryurl;
214 
215  $numTagged = 0;
216 
217  /* construct array of arrays of name and sha1's */
218  $files=array();
219  foreach ($ToAntelink as $row) {
220  $file['hash']=$row['pfile_sha1'];
221  $file['name']=$row['ufile_name'];
222  $files[]=$file;
223  }
224  $request['files']=$files;
225 
226  $PostData = json_encode($request);
227 
228  $curlch = curl_init($acmequeryurl);
229  SetCurlArgs($curlch);
230 
231  curl_setopt($curlch, CURLOPT_POST, true);
232  curl_setopt($curlch, CURLOPT_POSTFIELDS, $PostData);
233  curl_setopt($curlch, CURLOPT_RETURNTRANSFER, true);
234 
235  //getting response from server
236  $response = curl_exec($curlch);
237 
238  if (curl_errno($curlch)) {
239  // Fatal: display curl errors
240  echo "Error " . curl_errno($curlch) . ": " . curl_error($curlch) . "\n";
241  return 0;
242  // exit;
243  }
244 
245  //closing the curl
246  curl_close($curlch);
247 
248  $response = json_decode($response);
249  //echo "response\n";
250  //print_r($response);
251 
252  // print any errors
253  if ($response->error) {
254  echo $response->error . "\n";
255  }
256 
257  /* Add tag or print */
258  if (is_array($response->results)) {
259  foreach ($response->results as $result) {
260  $row = GetRawRow($result->sha1, $ToAntelink);
261 
262  if ($PrintOnly) {
263  if (! empty($row)) {
264  print_r($row);
265  }
266  // echo $row['ufile_name'] . "\n";
267  print_r($result);
268  continue;
269  }
270 
271  foreach ($result->projects as $project) {
272  /* check if acme_project already exists (check if the url is unique) */
273  $url = pg_escape_string($PG_CONN, $project->url);
274  $name = pg_escape_string($PG_CONN, $project->name);
275  $acme_project_pk = '';
276  $sql = "SELECT acme_project_pk from acme_project where url='$url' and project_name='$name'";
277  $sqlresult = pg_query($PG_CONN, $sql);
278  DBCheckResult($sqlresult, $sql, __FILE__, __LINE__);
279  if (pg_num_rows($sqlresult) > 0) {
280  $projrow = pg_fetch_assoc($sqlresult);
281  $acme_project_pk = $projrow['acme_project_pk'];
282  }
283  pg_free_result($sqlresult);
284 
285  if (empty($acme_project_pk)) {
286  /* this is a new acme_project, so write the acme_project record */
287  $acme_project_pk = writeacme_project($project, $Verbose);
288  }
289 
290  /* write the acme_pfile record */
291  writeacme_pfile($acme_project_pk, $row['pfile_pk']);
292 
293  /* Tag the pfile (update tag_file table) */
294  /* There is no constraint preventing duplicate tags so do a precheck */
295  $sql = "SELECT * from tag_file where pfile_fk='$row[pfile_pk]' and tag_fk='$tag_pk'";
296  $sqlresult = pg_query($PG_CONN, $sql);
297  DBCheckResult($sqlresult, $sql, __FILE__, __LINE__);
298  if (pg_num_rows($sqlresult) == 0) {
299  $sql = "insert into tag_file (tag_fk, pfile_fk, tag_file_date, tag_file_text) values ($tag_pk, '$row[pfile_pk]', now(), NULL)";
300  $insresult = pg_query($PG_CONN, $sql);
301  DBCheckResult($insresult, $sql, __FILE__, __LINE__);
302  pg_free_result($insresult);
303  $numTagged++;
304  }
305  pg_free_result($sqlresult);
306  }
307  }
308  }
309 
310  return $numTagged;
311 }
312 
318 function GetRawRow($sha1, $ToAntelink)
319 {
320  /* find the sha1 in $ToAntelink and print the ufile_name */
321  foreach ($ToAntelink as $row) {
322  if (strcasecmp($row['pfile_sha1'], $sha1) == 0) {
323  return $row;
324  }
325  }
326  return '';
327 }
328 
329 
334 function SetCurlArgs($ch)
335 {
336  global $SysConf;
337  curl_setopt($ch,CURLOPT_USERAGENT,'Curl-php');
338  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
339  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
340  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
341  curl_setopt($ch,
342  CURLOPT_HTTPHEADER, array("Content-Type:
343  application/json; charset=utf-8","Accept:application/json,
344  text/javascript, */*; q=0.01"));
345 
346  /* parse http_proxy server and port */
347  $http_proxy = $SysConf['FOSSOLOGY']['http_proxy'];
348  $ProxyServer = substr($http_proxy, 0, strrpos($http_proxy, ":"));
349  $ProxyPort = substr(strrchr($http_proxy, ":"), 1);
350  if (! empty($ProxyServer)) {
351  curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
352  curl_setopt($ch, CURLOPT_PROXY, $ProxyServer);
353  if (! empty($ProxyPort)) {
354  curl_setopt($ch, CURLOPT_PROXYPORT, $ProxyPort);
355  }
356  curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
357  }
358 }
359 
365 function Usage($argc, $argv)
366 {
367  echo "$argv[0] -v -p -u {upload_pk} -t {tag_pk}\n";
368  echo " -p prints out raw antepedia info, but do not update the db.\n";
369  echo " -v prints project found after inserting into db.\n";
370 }
371 
378 function writeacme_project($project, $Verbose)
379 {
380  global $PG_CONN;
381 
382  $project_name = pg_escape_string($PG_CONN, $project->name);
383  $url = pg_escape_string($PG_CONN, $project->url);
384  $description = pg_escape_string($PG_CONN, $project->description);
385 
386  /* convert licenses array to pipe delimited list */
387  $licenses = '';
388  foreach ($project->licenses as $license) {
389  if (! empty($licenses)) {
390  $licenses .= '|';
391  }
392  $licenses .= pg_escape_string($PG_CONN, $license);
393  }
394 
395  /* figure out if we have artefact or content data and pull release date an version out of their respective structs */
396  if (! empty($project->artefacts)) {
397  $artefact = $project->artefacts[0];
398  $projectDate = $artefact->releaseDate;
399  $version = pg_escape_string($PG_CONN, $artefact->version);
400  } else {
401  $content = $project->contents[0];
402  $projectDate = $content->releaseDate;
403  $version = pg_escape_string($PG_CONN, $content->revision);
404  }
405 
406  /* convert unix time to date m/d/yyyy
407  * Watch out for time stamps in milliseconds
408  */
409  if ($projectDate > 20000000000) {
410  $projectDate = $projectDate / 1000; // convert to seconds if necessary
411  }
412  $releasedate = date("Ymd", $projectDate);
413 
414  if ($Verbose) {
415  echo "Found project: $project_name\n";
416  }
417 
418  /* insert the data */
419  $sql = "insert into acme_project (project_name, url, description, licenses, releasedate, version)
420  values ('$project_name', '$url', '$description', '$licenses', '$releasedate', '$version')";
421  $InsResult = pg_query($PG_CONN, $sql);
422  DBCheckResult($InsResult, $sql, __FILE__, __LINE__);
423  pg_free_result($InsResult);
424 
425  /* retrieve and return the primary key */
426  $sql = "select acme_project_pk from acme_project where project_name='$project_name' and url='$url' and description='$description' and licenses='$licenses' and releasedate='$releasedate' and version='$version' ";
427  $result = pg_query($PG_CONN, $sql);
428  DBCheckResult($result, $sql, __FILE__, __LINE__);
429  $row = pg_fetch_assoc($result);
430  pg_free_result($result);
431  return $row['acme_project_pk'];
432 }
433 
439 function writeacme_pfile($acme_project_pk, $pfile_pk)
440 {
441  global $PG_CONN;
442 
443  /* insert the data */
444  $sql = "insert into acme_pfile (pfile_fk, acme_project_fk) values ($pfile_pk, $acme_project_pk)";
445  // ignore errors (this is a prototype). Errors are almost certainly from a duplicate insertion
446  @$InsResult = pg_query($PG_CONN, $sql);
447 }
448 
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:75
foreach($Options as $Option=> $OptVal) if(0==$reference_flag &&0==$nomos_flag) $PG_CONN
DBCheckResult($result, $sql, $filenm, $lineno)
Check the postgres result for unexpected errors. If found, treat them as fatal.
Definition: common-db.php:198
bootstrap($sysconfdir="")
Bootstrap the fossology php library.
Definition: migratetest.php:93
ConfigInit($sysconfdir, &$SysConf)
Initialize the fossology system after bootstrap().