FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
mktop1k.php
1 #!/usr/bin/php
2 
3 <?php
4 /***********************************************************
5  mktop1k.php
6  Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License
10  version 2 as published by the Free Software Foundation.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License along
18  with this program; if not, write to the Free Software Foundation, Inc.,
19  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  ***********************************************************/
21 
38 // FIXME: this should bet a global from pathinclude? $LIBDIR = '/usr/local/lib';
39 require_once("FIXMETOBERELATIVE/pathinclude.php");
40 require_once("$LIBDIR/lib_projxml.h.php");
41 //require_once("./lib_projxml.h.php"); // dev copy
42 
43 
44 $usage = <<< USAGE
45 Usage: mktop1k [-h] -i <in-file> -o <out-file> [-n nnn]
46  Where: -h optional help, displays this message
47  <in-file> path to an uncompressed Freshmeat rdf XML file
48  <out-file> path to filename where the xml output will be generated.
49  -n nnn optional parameter to indicate how many projects to
50  extract.
51 
52  Default is 1000.
53 
54  The projects are always extracted in priority order.
55  For example, -n 10 will get the top 10 Freshmeat packages.
56  A range of numbers is not supported.
57 
58 USAGE;
59 
60 if ($argc <= 4) {
61  echo $usage;
62  exit(1);
63 }
64 
65 // default number of projects to get.
66 $HowMany_projects = 1000;
67 
68 for ($i = 1; $i < $argc; $i++) {
69  switch ($argv[$i]) {
70  case '-i':
71  $i++;
72  if (isset($argv[$i])) {
73  $in_file = $argv[$i];
74  }
75  else {
76  die("ERROR: Must specify an uncompressed filename after -i");
77  }
78  break;
79  case '-h':
80  echo $usage;
81  exit(0);
82  break;
83  case '-n':
84  $i++;
85  if (isset($argv[$i])) {
86  $HowMany_projects = (int) $argv[$i];
87  }
88  else {
89  die("ERROR: Must specify a number between 1-1000 after -n");
90  }
91  break;
92  case '-o':
93  $i++;
94  if (isset($argv[$i])) {
95  $out_file = $argv[$i];
96  }
97  else {
98  die("ERROR: Must specify an uncompressed filename after -o");
99  }
100  break;
101  default:
102  die("ERROR: Unknown argument: $argv[$i]\n$usage");
103  break;
104  }
105 }
106 
107 $F1 = fopen("$in_file", 'r') or die("can't open file: $php_errormsg\n");
108 
109 /* look for the top 1000 projects, when found, write the project
110  entry to a file.
111 
112  NOTE: I'm bothered by something here... while one gets the top
113  1000, there could be drastic differences (not likely between any two
114  days, but possible)....It doesn't really affect this code, but could
115  affect users of the output files.
116  */
117 
118 $Output = fopen("$out_file", 'w') or die("Can' open: $php_errormsg\n");
119 
120 echo "Extracting the top $HowMany_projects projects from:\n$in_file\n";
121 echo "\nWriting the top $HowMany_projects projects to: $out_file\n";
122 
123 // need a valid doc, write the header 1st, and open tag
124 write_hdr($Output);
125 
126 while(false != ($line = fgets($F1, 1024))) {
127  # echo "Line is:\n$line\n";
128 
129  if (preg_match('/<project>/', $line)) {
130  $proj_mark = ftell($F1);
131  }
132  elseif (preg_match('/<popularity_rank>[0-9].*</', $line)) {
133  $pos = strpos($line, '>');
134  $rank_pos = $pos + 1;
135  $rank_end = strpos($line, '</', $rank_pos);
136  $rank_len = $rank_end - $rank_pos;
137  $rank = substr($line, $rank_pos, $rank_len);
138  if ((int)$rank <= $HowMany_projects){
139  //pdbg("Processing rank:$rank");
140  write_entry($F1, $proj_mark, $Output);
141  }
142  }
143 
144 }
145 
146 // write the end tag and close up shop
147 
148 close_tag($Output);
149 fclose($F1);
150 fclose($Output);
151 
152 echo "Done\n";
153 
154 ?>
char A[MAXCMD]
input for this system
Definition: finder.c:37
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:75