FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
wget_agent.c
Go to the documentation of this file.
1 /***************************************************************
2  wget_agent: Retrieve a file and put it in the database.
3 
4  Copyright (C) 2007-2014 Hewlett-Packard Development Company, L.P.
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License
8  version 2 as published by the Free Software Foundation.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License along
16  with this program; if not, write to the Free Software Foundation, Inc.,
17  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19  ***************************************************************/
20 
25 #define _GNU_SOURCE // for asprintf
26 
27 #define ASPRINTF_MEM_ERROR 88
28 #define ASPRINTF_MEM_ERROR_LOG LOG_FATAL("Not enough memory for asprintf before line %d", __LINE__)
29 
30 #include "wget_agent.h"
31 
32 char SQL[STRMAX];
33 
34 PGconn *pgConn = NULL;
35 long GlobalUploadKey=-1;
36 char GlobalTempFile[STRMAX];
37 char GlobalURL[URLMAX];
38 char GlobalType[STRMAX];
39 char GlobalParam[STRMAX];
40 char *GlobalProxy[6];
41 char GlobalHttpProxy[STRMAX];
43 gid_t ForceGroup=-1;
44 
50 int IsFile(char *Fname, int Link)
51 {
52  stat_t Stat;
53  int rc;
54  if (!Fname || (Fname[0]=='\0')) return(0); /* not a directory */
55  if (Link) rc = stat64(Fname,&Stat);
56  else rc = lstat64(Fname,&Stat);
57  if (rc != 0) return(0); /* bad name */
58  return(S_ISREG(Stat.st_mode));
59 } /* IsFile() */
60 
65 void SafeExit(int rc)
66 {
67  if (pgConn) PQfinish(pgConn);
69  exit(rc);
70 } /* SafeExit() */
71 
78 int GetPosition(char *URL)
79 {
80  if (NULL != strstr(URL, "http://")) return 7;
81  if (NULL != strstr(URL, "https://")) return 8;
82  if (NULL != strstr(URL, "ftp://")) return 6;
83  return 0;
84 }
85 
93 void DBLoadGold()
94 {
95  Cksum *Sum;
96  char *Unique=NULL;
97  char *SHA1, *MD5, *Len;
98  char SQL[STRMAX];
99  long PfileKey;
100  char *Path;
101  char SHA256[65];
102  FILE *Fin;
103  int rc = -1;
104  PGresult *result;
105  memset(SHA256, '\0', sizeof(SHA256));
106 
107  LOG_VERBOSE0("Processing %s",GlobalTempFile);
108  Fin = fopen(GlobalTempFile,"rb");
109  if (!Fin)
110  {
111  LOG_FATAL("upload %ld Unable to open temp file %s from %s",
113  SafeExit(1);
114  }
115 
116  Sum = SumComputeFile(Fin);
117  fclose(Fin);
118 
119  // Calculate sha256 value
120  rc = calc_sha256sum(GlobalTempFile, SHA256);
121  if (rc != 0)
122  {
123  LOG_FATAL("Unable to calculate SHA256 of %s\n", GlobalTempFile);
124  SafeExit(56);
125  }
126 
127  if ((int)ForceGroup > 0)
128  {
129  rc = chown(GlobalTempFile,-1,ForceGroup);
130  if (rc) LOG_ERROR("chown failed on %s, error: %s", GlobalTempFile, strerror(errno));
131  }
132 
133  if (!Sum)
134  {
135  LOG_FATAL("upload %ld Unable to compute checksum for %s from %s",
137  SafeExit(2);
138  }
139 
140  if (Sum->DataLen <= 0)
141  {
142  LOG_FATAL("upload %ld No bytes downloaded from %s to %s.",
144  SafeExit(3);
145  }
146 
147  Unique = SumToString(Sum);
148  LOG_VERBOSE0("Unique %s",Unique);
149 
150  if (GlobalImportGold)
151  {
152  LOG_VERBOSE0("Import Gold %s",Unique);
153  rc = fo_RepImport(GlobalTempFile,"gold",Unique,1);
154  if (rc != 0)
155  {
156  LOG_FATAL("upload %ld Failed to import %s from %s into repository gold %s",
158  SafeExit(4);
159  }
160  /* Put the file in the "files" repository too */
161  Path = fo_RepMkPath("gold",Unique);
162  if ((int)ForceGroup >= 0)
163  {
164  rc = chown(Path,-1,ForceGroup);
165  if (rc) LOG_ERROR("chown failed on %s, error: %s", Path, strerror(errno));
166  }
167  } /* if GlobalImportGold */
168  else /* if !GlobalImportGold */
169  {
170  Path = GlobalTempFile;
171  } /* else if !GlobalImportGold */
172 
173  LOG_VERBOSE0("Path is %s",Path);
174 
175  if (!Path)
176  {
177  LOG_FATAL("upload %ld Failed to determine repository location for %s in gold",
178  GlobalUploadKey,Unique);
179  SafeExit(5);
180  }
181 
182  LOG_VERBOSE0("Import files %s",Path);
183 
184  if (fo_RepImport(Path,"files",Unique,1) != 0)
185  {
186  LOG_FATAL("upload %ld Failed to import %s from %s into files",
187  GlobalUploadKey,Unique,Path);
188  SafeExit(6);
189  }
190 
191  if ((int)ForceGroup >= 0)
192  {
193  rc = chown(Path,-1,ForceGroup);
194  if (rc) LOG_ERROR("chown failed on %s, error: %s", Path, strerror(errno));
195  }
196 
197  if (Path != GlobalTempFile)
198  {
199  if(Path)
200  {
201  free(Path);
202  Path = NULL;
203  }
204  }
205 
206  /* Now update the DB */
207  /* Break out the sha1, md5, len components **/
208  SHA1 = Unique;
209  MD5 = Unique+41; /* 40 for sha1 + 1 for '.' */
210  Len = Unique+41+33; /* 32 for md5 + 1 for '.' */
211  /* Set the pfile */
212  memset(SQL,'\0',STRMAX);
213  snprintf(SQL,STRMAX-1,"SELECT pfile_pk FROM pfile WHERE pfile_sha1 = '%.40s' AND pfile_md5 = '%.32s' AND pfile_size = %s;",
214  SHA1,MD5,Len);
215  result = PQexec(pgConn, SQL); /* SELECT */
216  if (fo_checkPQresult(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(7);
217 
218  /* See if pfile needs to be added */
219  if (PQntuples(result) <=0)
220  {
221  /* Insert it */
222  memset(SQL,'\0',STRMAX);
223  snprintf(SQL,STRMAX-1,"INSERT INTO pfile (pfile_sha1, pfile_md5, pfile_sha256, pfile_size) VALUES ('%.40s','%.32s','%.64s',%s)",
224  SHA1,MD5,SHA256,Len);
225  PQclear(result);
226  result = PQexec(pgConn, SQL);
227  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(8);
228  PQclear(result);
229  result = PQexec(pgConn, "SELECT currval('pfile_pfile_pk_seq')");
230  if (fo_checkPQresult(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(182);
231  }
232 
233  PfileKey = atol(PQgetvalue(result,0,0));
234  LOG_VERBOSE0("pfile_pk = %ld",PfileKey);
235 
236  /* Update the DB so the pfile is linked to the upload record */
237  PQclear(result);
238  result = PQexec(pgConn, "BEGIN");
239  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(-1);
240 
241  memset(SQL,0,STRMAX);
242  snprintf(SQL,STRMAX-1,"SELECT * FROM upload WHERE upload_pk=%ld FOR UPDATE;",GlobalUploadKey);
243  PQclear(result);
244  result = PQexec(pgConn, SQL);
245  if (fo_checkPQresult(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(-1);
246 
247  memset(SQL,0,STRMAX);
248  snprintf(SQL,STRMAX-1,"UPDATE upload SET pfile_fk=%ld WHERE upload_pk=%ld",
249  PfileKey,GlobalUploadKey);
250  LOG_VERBOSE0("SQL=%s\n",SQL);
251  PQclear(result);
252  result = PQexec(pgConn, SQL);
253  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(9);
254  PQclear(result);
255  result = PQexec(pgConn, "COMMIT;");
256  if (fo_checkPQcommand(pgConn, result, SQL, __FILE__, __LINE__)) SafeExit(92);
257  PQclear(result);
258 
259  /* Clean up */
260  if (Sum)
261  {
262  free(Sum);
263  Sum = NULL;
264  }
265 
266  if (Unique)
267  {
268  free(Unique);
269  Unique = NULL;
270  }
271 } /* DBLoadGold() */
272 
273 
281 int TaintURL(char *Sin, char *Sout, int SoutSize)
282 {
283  int i;
284  int si;
285  memset(Sout,'\0',SoutSize);
286  SoutSize--; /* always keep the EOL */
287  for(i=0,si=0; (si<SoutSize) && (Sin[i] != '\0'); i++)
288  {
289  if (Sin[i] == '#') return(0); /* end at the start of comment */
290  if (!strchr("'`",Sin[i]) && !isspace(Sin[i])) Sout[si++] = Sin[i];
291  else
292  {
293  if (si+3 >= SoutSize) return(0); /* no room */
294  snprintf(Sout+si,4,"%%%02X",Sin[i]);
295  si+=3;
296  }
297  }
298  return(Sin[i]=='\0');
299 } /* TaintURL() */
300 
301 
312 char *PrepareWgetDest(char *TempFile, char *TempFileDir, char *TempFileDirectory)
313 {
314  if (TempFile && TempFile[0])
315  {
316  /* Delete the temp file if it exists */
317  unlink(TempFile);
318  return TempFileDirectory;
319  }
320  else if(TempFileDir && TempFileDir[0])
321  {
322  return TempFileDir;
323  }
324 
325  return NULL;
326 }
327 
328 
340 int GetURL(char *TempFile, char *URL, char *TempFileDir)
341 {
342  char *cmd;
343  char TaintedURL[STRMAX];
344  char TempFileDirectory[STRMAX+128];
345  char *delete_tmpdir_cmd;
346  int rc;
347  int res;
348 
349  memset(TempFileDirectory,'\0',STRMAX+128);
350 
351  /* save each upload files in /srv/fossology/repository/localhost/wget/wget.xxx.dir/ */
352  sprintf(TempFileDirectory, "%s.dir", TempFile);
353  res = asprintf(&delete_tmpdir_cmd, "rm -rf %s", TempFileDirectory);
354  if (res == -1)
355  {
356  ASPRINTF_MEM_ERROR_LOG;
357  SafeExit(ASPRINTF_MEM_ERROR);
358  }
359 #if 1
360  char WgetArgs[]="--no-check-certificate --progress=dot -rc -np -e robots=off";
361 #else
362  /* wget < 1.10 does not support "--no-check-certificate" */
363  char WgetArgs[]="--progress=dot -rc -np -e robots=off";
364 #endif
365 
366  if (!TaintURL(URL,TaintedURL,STRMAX))
367  {
368  LOG_FATAL("Failed to taint the URL '%s'",URL);
369  SafeExit(10);
370  }
371 
372  /*
373  Wget options:
374  --progress=dot :: display a new line as it progresses.
375  --no-check-certificate :: download HTTPS files even if the cert cannot
376  be validated. (Neal has many issues with SSL and does not view it
377  as very secure.) Without this, some caching proxies and web sites
378  with old certs won't download. Granted, in theory a bad cert should
379  prevent downloads. In reality, 99.9% of bad certs are because the
380  admin did not notice that they expired and not because of a hijacking
381  attempt.
382  */
383 
384  struct stat sb;
385  int rc_system =0;
386  char no_proxy[STRMAX] = {0};
387  char proxy[STRMAX] = {0};
388  char proxy_temp[STRMAX] = {0};
389 
390  /* http_proxy is optional so don't error if it doesn't exist */
392  if (GlobalProxy[0] && GlobalProxy[0][0])
393  {
394  snprintf(proxy_temp, STRMAX-1, "export http_proxy='%s' ;", GlobalProxy[0]);
395  strcat(proxy, proxy_temp);
396  }
397  if (GlobalProxy[1] && GlobalProxy[1][0])
398  {
399  snprintf(proxy_temp, STRMAX-1, "export https_proxy='%s' ;", GlobalProxy[1]);
400  strcat(proxy, proxy_temp);
401  }
402  if (GlobalProxy[2] && GlobalProxy[2][0])
403  {
404  snprintf(proxy_temp, STRMAX-1, "export ftp_proxy='%s' ;", GlobalProxy[2]);
405  strcat(proxy, proxy_temp);
406  }
407  if (GlobalProxy[3] && GlobalProxy[3][0])
408  {
409  snprintf(no_proxy, STRMAX-1, "-e no_proxy='%s'", GlobalProxy[3]);
410  }
411 
412  char *dest;
413 
414  dest = PrepareWgetDest(TempFile, TempFileDir, TempFileDirectory);
415 
416  if (dest) {
417  res = asprintf(&cmd," %s /usr/bin/wget -q %s -P '%s' '%s' %s %s 2>&1",
418  proxy, WgetArgs, dest, TaintedURL, GlobalParam, no_proxy);
419  }
420  else
421  {
422  res = asprintf(&cmd," %s /usr/bin/wget -q %s '%s' %s %s 2>&1",
423  proxy, WgetArgs, TaintedURL, GlobalParam, no_proxy);
424  }
425 
426  if (res == -1)
427  {
428  ASPRINTF_MEM_ERROR_LOG;
429  free(delete_tmpdir_cmd);
430  SafeExit(ASPRINTF_MEM_ERROR);
431  }
432 
433  /* the command is like
434  ". /usr/local/etc/fossology/Proxy.conf;
435  /usr/bin/wget -q --no-check-certificate --progress=dot -rc -np -e robots=off -P
436  '/srv/fossology/repository/localhost/wget/wget.xxx.dir/'
437  'http://a.org/file' -l 1 -R index.html* 2>&1"
438  */
439  LOG_VERBOSE0("CMD: %s", cmd);
440  rc = system(cmd);
441 
442  if (WIFEXITED(rc) && (WEXITSTATUS(rc) != 0))
443  {
444  LOG_FATAL("upload %ld Download failed; Return code %d from: %s",GlobalUploadKey,WEXITSTATUS(rc),cmd);
445  unlink(GlobalTempFile);
446  rc_system = system(delete_tmpdir_cmd);
447  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
448  free(delete_tmpdir_cmd);
449  SafeExit(12);
450  }
451 
452  /* Run from scheduler! store /srv/fossology/repository/localhost/wget/wget.xxx.dir/<files|directories> to one temp file */
453  if (TempFile && TempFile[0])
454  {
455  char* tmpfile_path;
456  /* for one url http://a.org/test.deb, TempFilePath should be /srv/fossology/repository/localhost/wget/wget.xxx.dir/a.org/test.deb */
457  int Position = GetPosition(TaintedURL);
458  if (0 == Position)
459  {
460  LOG_FATAL("path %s is not http://, https://, or ftp://", TaintedURL);
461  unlink(GlobalTempFile);
462  rc_system = system(delete_tmpdir_cmd);
463  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
464  free(delete_tmpdir_cmd);
465  SafeExit(26);
466  }
467  res = asprintf(&tmpfile_path, "%s/%s", TempFileDirectory, TaintedURL + Position);
468  if (res == -1)
469  {
470  ASPRINTF_MEM_ERROR_LOG;
471  free(delete_tmpdir_cmd);
472  SafeExit(ASPRINTF_MEM_ERROR);
473  }
474 
475  if (!stat(tmpfile_path, &sb))
476  {
477  if (S_ISDIR(sb.st_mode))
478  {
479  res = asprintf(&cmd, "find '%s' -mindepth 1 -type d -empty -exec rmdir {} \\; > /dev/null 2>&1", tmpfile_path);
480  if (res == -1)
481  {
482  ASPRINTF_MEM_ERROR_LOG;
483  free(tmpfile_path);
484  free(delete_tmpdir_cmd);
485  SafeExit(ASPRINTF_MEM_ERROR);
486  }
487  rc_system = system(cmd); // delete all empty directories downloaded
488  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, cmd)
489  free(cmd);
490 
491  res = asprintf(&cmd, "tar -cf '%s' -C '%s' ./ 1>/dev/null", TempFile, tmpfile_path);
492  if (res == -1)
493  {
494  ASPRINTF_MEM_ERROR_LOG;
495  free(tmpfile_path);
496  free(delete_tmpdir_cmd);
497  SafeExit(ASPRINTF_MEM_ERROR);
498  }
499  }
500  else
501  {
502  res = asprintf(&cmd, "mv '%s' '%s' 2>&1", tmpfile_path, TempFile);
503  if (res == -1)
504  {
505  ASPRINTF_MEM_ERROR_LOG;
506  free(tmpfile_path);
507  free(delete_tmpdir_cmd);
508  SafeExit(ASPRINTF_MEM_ERROR);
509  }
510  }
511 
512  free(tmpfile_path);
513 
514  rc_system = system(cmd);
515  if (rc_system != 0)
516  {
517  systemError(__LINE__, rc_system, cmd)
518  free(cmd);
519  unlink(GlobalTempFile);
520  rc_system = system(delete_tmpdir_cmd);
521  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
522  free(delete_tmpdir_cmd);
523  SafeExit(24); // failed to store the temperary directory(one file) as one temperary file
524  }
525 
526  }
527  else
528  {
529  res = asprintf(&cmd, "find '%s' -type f -exec mv {} %s \\; > /dev/null 2>&1", TempFileDirectory, TempFile);
530  if (res == -1)
531  {
532  ASPRINTF_MEM_ERROR_LOG;
533  free(delete_tmpdir_cmd);
534  SafeExit(ASPRINTF_MEM_ERROR);
535  }
536  rc_system = system(cmd);
537  if (rc_system != 0)
538  {
539  systemError(__LINE__, rc_system, cmd)
540  free(cmd);
541  unlink(GlobalTempFile);
542  rc_system = system(delete_tmpdir_cmd);
543  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
544  free(delete_tmpdir_cmd);
545  SafeExit(24); // failed to store the temperary directory(one file) as one temperary file
546  }
547 
548  }
549  }
550 
551  if (TempFile && TempFile[0] && !IsFile(TempFile,1))
552  {
553  LOG_FATAL("upload %ld File %s not created from URL: %s, CMD: %s",GlobalUploadKey,TempFile,URL, cmd);
554  free(cmd);
555  unlink(GlobalTempFile);
556  rc_system = system(delete_tmpdir_cmd);
557  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
558  free(delete_tmpdir_cmd);
559  SafeExit(15);
560  }
561 
562  free(cmd);
563 
564  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
565  rc_system = system(delete_tmpdir_cmd);
566  if (!WIFEXITED(rc_system)) systemError(__LINE__, rc_system, delete_tmpdir_cmd)
567  LOG_VERBOSE0("upload %ld Downloaded %s to %s",GlobalUploadKey,URL,TempFile);
568 
569  free(delete_tmpdir_cmd);
570 
571  return(0);
572 } /* GetURL() */
573 
579 {
580  char *command = NULL;
581  char *tmp_file_directory;
582  char *delete_tmpdir_cmd;
583  char *tmp_home;
584 
585  int rc = 0;
586  int resethome = 0; // 0: default; 1: home is null before setting, should rollback
587  char *homeenv = NULL;
588  int res;
589 
590  homeenv = getenv("HOME");
591  if(NULL == homeenv) resethome = 1;
592 
593  /* We need HOME to point to where .gitconfig is installed
594  * path is the repository path and .gitconfig is installed in its parent directory
595  */
596  res = asprintf(&tmp_home, "%s/..", fo_config_get(sysconfig, "FOSSOLOGY", "path", NULL));
597  if (res == -1)
598  {
599  return ASPRINTF_MEM_ERROR;
600  }
601 
602  setenv("HOME", tmp_home, 1);
603  free(tmp_home);
604 
605  /* save each upload files in /srv/fossology/repository/localhost/wget/wget.xxx.dir/ */
606  res = asprintf(&tmp_file_directory, "%s.dir", GlobalTempFile);
607  if (res == -1)
608  {
609  ASPRINTF_MEM_ERROR_LOG;
610  return ASPRINTF_MEM_ERROR;
611  }
612 
613  res = asprintf(&delete_tmpdir_cmd, "rm -rf %s", tmp_file_directory);
614  if (res == -1)
615  {
616  ASPRINTF_MEM_ERROR_LOG;
617  free(tmp_file_directory);
618  return ASPRINTF_MEM_ERROR;
619  }
620 
621  command = GetVersionControlCommand(1);
622  if (!command)
623  {
624  free(tmp_file_directory);
625  return ASPRINTF_MEM_ERROR;
626  }
627  rc = system(command);
628  free(command);
629 
630  if (resethome) // rollback
631  unsetenv("HOME");
632  else
633  setenv("HOME", homeenv, 1);
634 
635  if (rc != 0)
636  {
637  command = GetVersionControlCommand(-1);
638  if (!command)
639  {
640  ASPRINTF_MEM_ERROR_LOG;
641  free(tmp_file_directory);
642  return ASPRINTF_MEM_ERROR;
643  }
644  systemError(__LINE__, rc, command)
649  LOG_FATAL("please make sure the URL of repo is correct, also add correct proxy for your version control system, command is:%s, GlobalTempFile is:%s, rc is:%d. \n", command, GlobalTempFile, rc);
650  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
651  rc = system(delete_tmpdir_cmd);
652  if (!WIFEXITED(rc)) systemError(__LINE__, rc, delete_tmpdir_cmd)
653  free(command);
654  free(tmp_file_directory);
655  free(delete_tmpdir_cmd);
656  return 1;
657  }
658 
659  res = asprintf(&command, "tar -cf '%s' -C '%s' ./ 1>/dev/null", GlobalTempFile, tmp_file_directory);
660  if (res == -1)
661  {
662  ASPRINTF_MEM_ERROR_LOG;
663  free(tmp_file_directory);
664  free(delete_tmpdir_cmd);
665  return ASPRINTF_MEM_ERROR;
666  }
667  free(tmp_file_directory);
668  rc = system(command);
669  if (rc != 0)
670  {
671  systemError(__LINE__, rc, command)
672  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
673  rc = system(delete_tmpdir_cmd);
674  if (!WIFEXITED(rc)) systemError(__LINE__, rc, delete_tmpdir_cmd)
675  LOG_FATAL("DeleteTempDirCmd is:%s\n", delete_tmpdir_cmd);
676  free(delete_tmpdir_cmd);
677  return 1;
678  }
679 
680  /* remove the temp dir /srv/fossology/repository/localhost/wget/wget.xxx.dir/ for this upload */
681  rc = system(delete_tmpdir_cmd);
682  if (!WIFEXITED(rc)) systemError(__LINE__, rc, delete_tmpdir_cmd)
683  free(delete_tmpdir_cmd);
684 
685  return 0; // succeed to retrieve source
686 }
687 
695 void SetEnv (char *S, char *TempFileDir)
696 {
697  int SLen,GLen; /* lengths for S and global string */
698 
699  GlobalUploadKey = -1;
700  memset(GlobalTempFile,'\0',STRMAX);
701  memset(GlobalURL,'\0',URLMAX);
702  if (!S) return;
703 
704  /* first value is the upload_pk */
705  GlobalUploadKey = atol(S);
706  while(S[0] && isdigit(S[0])) S++;
707  while(S[0] && isspace(S[0])) S++; /* skip spaces */
708 
709 #if 1
710  /* second value is the temp file location */
712  SLen=0;
713  GLen=0;
714  while((GLen < STRMAX-4) && S[SLen] && !isspace(S[SLen]))
715  {
716  if ((S[SLen] == '\'') || isspace(S[SLen]) || !isprint(S[SLen]))
717  {
718  sprintf(GlobalTempFile+GLen,"%%%02x",(unsigned char)(S[SLen]));
719  GLen += 3;
720  }
721  else GlobalTempFile[GLen++] = S[SLen];
722  SLen++;
723  }
724  S+=SLen;
725  while(S[0] && isspace(S[0])) S++; /* skip spaces */
726 #endif
727  if (TempFileDir)
728  {
729  memset(GlobalTempFile,'\0',STRMAX);
730  snprintf(GlobalTempFile,STRMAX-1,"%s/wget.%d",TempFileDir,getpid());
731  }
732 
733  /* third value is the URL location -- taint any single-quotes */
734  SLen=0;
735  GLen=0;
736  while((GLen < STRMAX-4) && S[SLen])
737  {
738  if ((S[SLen] == '\\') && isprint(S[SLen+1])) // in file path, if include '\ ', that mean this file name include spaces
739  {
740  LOG_FATAL("S[SLen] is:%c\n", S[SLen]);
741  GlobalURL[GLen++] = ' ';
742  SLen += 2;
743  continue;
744  }
745  else if ((S[SLen] != '\\') && isspace(S[SLen])) break;
746  else if ((S[SLen] == '\'') || isspace(S[SLen]) || !isprint(S[SLen]))
747  {
748  sprintf(GlobalURL+GLen,"%%%02x",(unsigned char)(S[SLen]));
749  GLen += 3;
750  }
751  else GlobalURL[GLen++] = S[SLen];
752  SLen++;
753  }
754  S+=SLen;
755 
756  while(S[0] && isspace(S[0])) S++; /* skip spaces */
757 
758  char Type[][4] = {"SVN", "Git", "CVS"};
759  int i = 0; // type index
760 
761  memset(GlobalType,'\0',STRMAX);
762  strncpy(GlobalType, S, 3);
763  if ((0 == strcmp(GlobalType, Type[i++])) || (0 == strcmp(GlobalType, Type[i++])) || (0 == strcmp(GlobalType, Type[i++])))
764  {
765  S += 3;
766  }
767  else
768  {
769  memset(GlobalType,'\0',STRMAX);
770  }
771 
772  strncpy(GlobalParam, S, sizeof(GlobalParam)); // get the parameters, kind of " -A rpm -R fosso -l 1* "
773  LOG_VERBOSE0(" upload %ld wget_agent globals loaded:\n upload_pk = %ld\n tmpfile=%s URL=%s GlobalParam=%s\n",GlobalUploadKey, GlobalUploadKey,GlobalTempFile,GlobalURL,GlobalParam);
774 } /* SetEnv() */
775 
776 
784 char *PathCheck(char *DirPath)
785 {
786  char *NewPath;
787  char *subs;
788  char TmpPath[2048];
789  char HostName[2048];
790 
791  NewPath = strdup(DirPath);
792 
793  if ((subs = strstr(NewPath,"%H")) )
794  {
795  /* hostname substitution */
796  gethostname(HostName, sizeof(HostName));
797 
798  *subs = 0;
799  snprintf(TmpPath, sizeof(TmpPath), "%s%s%s", NewPath, HostName, subs+2);
800  free(NewPath);
801  NewPath = strdup(TmpPath);
802  }
803 
804  if ((subs = strstr(NewPath, "%R")) )
805  {
806  /* repo location substitution */
807  *subs = 0;
808 
809  snprintf(TmpPath, sizeof(TmpPath), "%s%s%s", NewPath, fo_config_get(sysconfig, "FOSSOLOGY", "path", NULL), subs+2);
810  free(NewPath);
811  NewPath = strdup(TmpPath);
812  }
813 
814  return(NewPath);
815 }
816 
831 int Archivefs(char *Path, char *TempFile, char *TempFileDir, struct stat Status)
832 {
833  char *cmd;
834  int rc_system = 0;
835  int res;
836 
837  res = asprintf(&cmd , "mkdir -p '%s' >/dev/null 2>&1", TempFileDir);
838  if (res == -1)
839  {
840  ASPRINTF_MEM_ERROR_LOG;
841  return 0;
842  }
843 
844  rc_system = system(cmd);
845  if (!WIFEXITED(rc_system))
846  {
847  LOG_FATAL("[%s:%d] Could not create temporary directory", __FILE__, __LINE__);
848  systemError(__LINE__, rc_system, cmd)
849  free(cmd);
850  return 0;
851  }
852  free(cmd);
853 
854  if (S_ISDIR(Status.st_mode)) /* directory? */
855  {
856  res = asprintf(&cmd, "tar %s -cf '%s' -C '%s' ./ 1>/dev/null", GlobalParam, TempFile, Path);
857  if (res == -1)
858  {
859  ASPRINTF_MEM_ERROR_LOG;
860  return 0;
861  }
862  rc_system = system(cmd);
863  if (!WIFEXITED(rc_system))
864  {
865  systemError(__LINE__, rc_system, cmd)
866  free(cmd);
867  return 0;
868  }
869  free(cmd);
870  } else if (strstr(Path, "*")) // wildcards
871  {
872  /* for the wildcards upload, keep the path */
873  /* copy * files to TempFileDir/temp primarily */
874  res = asprintf(&cmd, "mkdir -p '%s/temp' > /dev/null 2>&1 && cp -r %s '%s/temp' > /dev/null 2>&1", TempFileDir, Path, TempFileDir);
875  if (res == -1)
876  {
877  ASPRINTF_MEM_ERROR_LOG;
878  return 0;
879  }
880  rc_system = system(cmd);
881  if (rc_system != 0)
882  {
883  systemError(__LINE__, rc_system, cmd)
884  free(cmd);
885  return 0;
886  }
887  free(cmd);
888  res = asprintf(&cmd, "tar -cf '%s' -C %s/temp ./ 1> /dev/null && rm -rf %s/temp > /dev/null 2>&1", TempFile, TempFileDir, TempFileDir);
889  if (res == -1)
890  {
891  ASPRINTF_MEM_ERROR_LOG;
892  return 0;
893  }
894  rc_system = system(cmd);
895  if (rc_system != 0)
896  {
897  systemError(__LINE__, rc_system, cmd)
898  free(cmd);
899  return 0;
900  }
901  free(cmd);
902  } else if(S_ISREG(Status.st_mode)) /* regular file? */
903  {
904  res = asprintf(&cmd, "cp '%s' '%s' >/dev/null 2>&1", Path, TempFile);
905  if (res == -1)
906  {
907  ASPRINTF_MEM_ERROR_LOG;
908  return 0;
909  }
910  rc_system = system(cmd);
911  if (rc_system != 0)
912  {
913  systemError(__LINE__, rc_system, cmd)
914  free(cmd);
915  return 0;
916  }
917  free(cmd);
918  } else return 0; /* neither a directory nor a regular file */
919 
920  return 1;
921 }
922 
928 void GetProxy()
929 {
930  int i = 0;
931  int count_temp = 0;
932  char *http_proxy_host = NULL;
933  char *http_proxy_port = NULL;
934  char *http_temp = NULL;
935 
936  for (i = 0; i < 6; i++)
937  {
938  GlobalProxy[i++] = NULL;
939  }
940  GError* error1 = NULL;
941  GError* error2 = NULL;
942  GError* error3 = NULL;
943  GError* error4 = NULL;
944 
945  i = 0;
946  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "http_proxy", &error1);
947  trim(GlobalProxy[i++]);
948  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "https_proxy", &error2);
949  trim(GlobalProxy[i++]);
950  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "ftp_proxy", &error3);
951  trim(GlobalProxy[i++]);
952  GlobalProxy[i] = fo_config_get(sysconfig, "FOSSOLOGY", "no_proxy", &error4);
953  trim(GlobalProxy[i++]);
954 
955 
956  if (GlobalProxy[0] && GlobalProxy[0][0])
957  {
958  http_proxy_port = strrchr(GlobalProxy[0], ':');
959  strncpy(GlobalHttpProxy, GlobalProxy[0], (http_proxy_port - GlobalProxy[0]));
960  http_proxy_port++;
961 
962  if (http_proxy_port && http_proxy_port[0])
963  {
964  /* exclude '/' in http_proxy_port and 'http://' in http_proxy_host */
965  http_temp = strchr(http_proxy_port, '/');
966  if (http_temp && http_temp[0])
967  {
968  count_temp = http_temp - http_proxy_port;
969  http_proxy_port[count_temp] = 0;
970  }
972  GlobalProxy[5] = http_proxy_port;
973 
974  http_proxy_host = strrchr(GlobalHttpProxy, '/');
975  if (http_proxy_host && http_proxy_host[0])
976  {
977  http_proxy_host++;
978  GlobalProxy[4] = http_proxy_host;
979  }
980  }
981  }
982 }
983 
988 void Usage(char *Name)
989 {
990  printf("Usage: %s [options] [OBJ]\n",Name);
991  printf(" -h :: help (print this message), then exit.\n");
992  printf(" -i :: Initialize the DB connection then exit (nothing downloaded)\n");
993  printf(" -g group :: Set the group on processed files (e.g., -g fossy).\n");
994  printf(" -G :: Do NOT copy the file to the gold repository.\n");
995  printf(" -d dir :: directory for downloaded file storage\n");
996  printf(" -k key :: upload key identifier (number)\n");
997  printf(" -A acclist :: Specify comma-separated lists of file name suffixes or patterns to accept.\n");
998  printf(" -R rejlist :: Specify comma-separated lists of file name suffixes or patterns to reject.\n");
999  printf(" -l depth :: Specify recursion maximum depth level depth. The default maximum depth is 5.\n");
1000  printf(" -c configdir :: Specify the directory for the system configuration.\n");
1001  printf(" -C :: run from command line.\n");
1002  printf(" -v :: verbose (-vv = more verbose).\n");
1003  printf(" -V :: print the version info, then exit.\n");
1004  printf(" OBJ :: if a URL is listed, then it is retrieved.\n");
1005  printf(" if a file is listed, then it used.\n");
1006  printf(" if OBJ and Key are provided, then it is inserted into\n");
1007  printf(" the DB and repository.\n");
1008  printf(" no file :: process data from the scheduler.\n");
1009 } /* Usage() */
1010 
1019 {
1020 #define PREFIXMAX 10
1021 
1022  const char needle[] = " ";
1023  const char needle2[] = "//";
1024  int index = 0;
1025  char *username = NULL;
1026  char *password = NULL;
1027  char http[PREFIXMAX] = "";
1028  char URI[FILEPATH] = "";
1029  char *token = NULL;
1030  char *temp = NULL;
1031  char *additionalParams = NULL;
1032 
1033  if (strstr(GlobalParam, "password") && strstr(GlobalParam, "username"))
1034  {
1035  temp = strstr(GlobalURL, needle2);
1036  if (!temp || (temp - GlobalURL) < 3)
1037  {
1038  return;
1039  }
1040  strcpy(URI, temp + 2);
1041  if (strlen(GlobalURL) - strlen(URI) > PREFIXMAX - 1)
1042  {
1043  return;
1044  }
1045 
1046  strncpy(http, GlobalURL, strlen(GlobalURL) - strlen(URI));
1047  /* get the first token */
1048  token = strtok(GlobalParam, needle);
1049  /* walk through other tokens */
1050  while( token != NULL )
1051  {
1052  if (1 == index) username = token;
1053  if (3 == index) {
1054  password = token;
1055  additionalParams = token + strlen(token) + 1;
1056  break;
1057  }
1058  token = strtok(NULL, needle);
1059  index++;
1060  }
1061  snprintf(GlobalURL, URLMAX, "%s%s:%s@%s", http, username, password, URI);
1062 
1063  if (strlen(additionalParams) > 0) {
1064  memmove(GlobalParam, additionalParams, strlen(additionalParams) +1);
1065  }
1066  else {
1067  memset(GlobalParam,'\0',STRMAX);
1068  }
1069  }
1070 }
1071 
1076 {
1077  const char needle[] = " ";
1078  int index = 0;
1079  int secondIndex = 0;
1080  char *username = NULL;
1081  char *token = NULL;
1082  char newParam[STRMAX];
1083  char *beg = NULL;
1084  char *end = NULL;
1085 
1086  memset(newParam, '\0', STRMAX);
1087  // SVN if parameters exists
1088  if (strstr(GlobalParam, "password") && strstr(GlobalParam, "username")) {
1089  /* get the first token */
1090  token = strtok(GlobalParam, needle);
1091  /* walk through other tokens */
1092  while( token != NULL )
1093  {
1094  if (1 == index) { //username is the first parameter
1095  username = token;
1096  break;
1097  }
1098  token = strtok(NULL, needle);
1099  index++;
1100  }
1101  // Create new parameters with masked password
1102  sprintf(newParam, " --username %s --password ****", username);
1103  memset(GlobalParam, '\0', STRMAX);
1104  strcpy(GlobalParam, newParam);
1105  }
1106  // GIT
1107  else {
1108  // First : from http://
1109  index = strcspn(GlobalURL, ":");
1110  // Second after username
1111  secondIndex = strcspn(GlobalURL + index + 1, ":");
1112  index = index + secondIndex + 1;
1113  if(index < strlen(GlobalURL)) { // Contains second :
1114  beg = (char *)malloc(index + 2);
1115  memset(beg, '\0', index + 2);
1116  strncpy(beg, GlobalURL, index + 1);
1117  // Place where password ends
1118  end = strchr(GlobalURL, '@');
1119  sprintf(newParam, "%s****%s", beg, end);
1120  strcpy(GlobalURL, newParam);
1121  }
1122  }
1123 }
1124 
1130 char* GetVersionControlCommand(int withPassword)
1131 {
1132  char Type[][4] = {"SVN", "Git", "CVS"};
1133  char *command;
1134  char *tmpfile_dir;
1135  int res;
1136 
1138  res = asprintf(&tmpfile_dir, "%s.dir", GlobalTempFile);
1139  if (res == -1)
1140  {
1141  return NULL;
1142  }
1143 
1144  if(withPassword < 0) MaskPassword();
1145  if (0 == strcmp(GlobalType, Type[0]))
1146  {
1147  if (GlobalProxy[0] && GlobalProxy[0][0])
1148  {
1149  res = asprintf(&command, "svn --config-option servers:global:http-proxy-host=%s --config-option servers:global:http-proxy-port=%s export %s %s %s --no-auth-cache >/dev/null 2>&1", GlobalProxy[4], GlobalProxy[5], GlobalURL, GlobalParam, tmpfile_dir);
1150  }
1151  else
1152  {
1153  res = asprintf(&command, "svn export %s %s %s --no-auth-cache >/dev/null 2>&1", GlobalURL, GlobalParam, tmpfile_dir);
1154  }
1155  }
1156  else if (0 == strcmp(GlobalType, Type[1]))
1157  {
1159  if (GlobalProxy[0] && GlobalProxy[0][0])
1160  {
1161  res = asprintf(&command, "git config --global http.proxy %s && git clone %s %s %s && rm -rf %s/.git", GlobalProxy[0], GlobalURL, GlobalParam, tmpfile_dir, tmpfile_dir);
1162  }
1163  else
1164  {
1165  res = asprintf(&command, "git clone %s %s %s >/dev/null 2>&1 && rm -rf %s/.git", GlobalURL, GlobalParam, tmpfile_dir, tmpfile_dir);
1166  }
1167  }
1168  if (res == -1)
1169  {
1170  free(tmpfile_dir);
1171  return NULL;
1172  }
1173 
1174  return command;
1175 }
int fo_checkPQresult(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres SELECT.
Definition: libfossdb.c:181
char * PathCheck(char *DirPath)
Check if path contains a "%H", "%R".
Definition: wget_agent.c:784
fo_conf * sysconfig
void MaskPassword()
Get the username from GlobalParam and create new parameters without password.
Definition: wget_agent.c:1075
char * GlobalProxy[6]
Proxy from fossology.conf.
Definition: wget_agent.c:40
gid_t ForceGroup
Set to group id to be used for download files.
Definition: wget_agent.c:43
int Archivefs(char *Path, char *TempFile, char *TempFileDir, struct stat Status)
Copy downloaded files to temporary directory.
Definition: wget_agent.c:831
int GetPosition(char *URL)
Get the position (ending + 1) of http|https|ftp:// of one url.
Definition: wget_agent.c:78
long GlobalUploadKey
Input for this system.
Definition: wget_agent.c:35
char * GetVersionControlCommand(int withPassword)
get the command to run to get files from version control system
Definition: wget_agent.c:1130
char GlobalParam[STRMAX]
Additional parameters.
Definition: wget_agent.c:39
int GetURL(char *TempFile, char *URL, char *TempFileDir)
Do the wget.
Definition: wget_agent.c:340
void Usage(char *Name)
Here are some suggested options.
Definition: wget_agent.c:988
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
char * fo_config_get(fo_conf *conf, const char *group, const char *key, GError **error)
Gets an element based on its group name and key name. If the group or key is not found, the error object is set and NULL is returned.
Definition: fossconfig.c:341
Cksum * SumComputeFile(FILE *Fin)
Compute the checksum, allocate and return a string containing the sum value.
Definition: checksum.c:127
char GlobalType[STRMAX]
Type of download (FILE/version control)
Definition: wget_agent.c:38
void replace_url_with_auth()
Translate authentication of git clone.
Definition: wget_agent.c:1018
void GetProxy()
Get proxy from fossology.conf.
Definition: wget_agent.c:928
PGconn * pgConn
For the DB.
Definition: wget_agent.c:34
char GlobalURL[URLMAX]
URL to download.
Definition: wget_agent.c:37
void SafeExit(int rc)
Closes the connection to the server, free the database connection, and exit.
Definition: wget_agent.c:65
void SetEnv(char *S, char *TempFileDir)
Convert input pairs into globals.
Definition: wget_agent.c:695
char GlobalHttpProxy[STRMAX]
HTTP proxy command to use.
Definition: wget_agent.c:41
int fo_checkPQcommand(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres commands (not select) If an error occured, write the error to s...
Definition: libfossdb.c:215
void DBLoadGold()
Insert a file into the database and repository.
Definition: wget_agent.c:93
char GlobalTempFile[STRMAX]
Temp file to be used.
Definition: wget_agent.c:36
int GetVersionControl()
Get source code from version control system.
Definition: wget_agent.c:578
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:364
Store check sum of a file.
Definition: checksum.h:39
uint64_t DataLen
Size of the file.
Definition: checksum.h:43
char * SumToString(Cksum *Sum)
Return string representing a Cksum. NOTE: The calling function must free() the string! ...
Definition: checksum.c:249
int GlobalImportGold
Set to 0 to not store file in gold repository.
Definition: wget_agent.c:42
char SQL[STRMAX]
For DB.
Definition: wget_agent.c:32
int fo_RepImport(char *Source, char *Type, char *Filename, int Link)
Import a file into the repository.
Definition: libfossrepo.c:824
int IsFile(char *Fname, int Link)
Given a filename, is it a file?
Definition: wget_agent.c:50
int TaintURL(char *Sin, char *Sout, int SoutSize)
Given a URL string, taint-protect it.
Definition: wget_agent.c:281
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:695
char * PrepareWgetDest(char *TempFile, char *TempFileDir, char *TempFileDirectory)
Prepare directory for wget.
Definition: wget_agent.c:312