FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
library.php
Go to the documentation of this file.
1 <?php
2 /***********************************************************
3  Copyright (C) 2010-2013 Hewlett-Packard Development Company, L.P.
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License
7  version 2 as published by the Free Software Foundation.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License along
15  with this program; if not, write to the Free Software Foundation, Inc.,
16  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 ***********************************************************/
18 
32 function hist_rowcmp_count_asc($a, $b)
33 {
34  $res = $a['copyright_count'] - $b['copyright_count'];
35  return $res;
36 }
37 
44 function hist_rowcmp_count_desc($a, $b)
45 {
46  $res = $a['copyright_count'] - $b['copyright_count'];
47  return -$res;
48 }
49 
56 function hist_rowcmp($rowa, $rowb)
57 {
58  return (strnatcasecmp($rowa['content'], $rowb['content']));
59 }
60 
67 function hist_rowcmp_desc($rowa, $rowb)
68 {
69  return -(strnatcasecmp($rowa['content'], $rowb['content']));
70 }
71 
78 function copyright_namecmp($rowa, $rowb)
79 {
80  return (strnatcasecmp($rowa['ufile_name'], $rowb['ufile_name']));
81 }
82 
83 
99 function GetFilesWithCopyright($agent_pk, $hash, $type, $uploadtree_pk,
100 $PkgsOnly=false, $offset=0, $limit="ALL",
101 $order="")
102 {
103  global $PG_CONN;
104 
105  /* Find lft and rgt bounds for this $uploadtree_pk */
106  $sql = "SELECT lft, rgt, upload_fk FROM uploadtree
107  WHERE uploadtree_pk = $uploadtree_pk";
108  $result = pg_query($PG_CONN, $sql);
109  DBCheckResult($result, $sql, __FILE__, __LINE__);
110  $row = pg_fetch_assoc($result);
111  $lft = $row["lft"];
112  $rgt = $row["rgt"];
113  $upload_pk = $row["upload_fk"];
114  pg_free_result($result);
115 
116  $sql = "select distinct uploadtree_pk, pfile_fk, ufile_name
117  from copyright,
118  (SELECT pfile_fk as PF, uploadtree_pk, ufile_name from uploadtree
119  where upload_fk=$upload_pk
120  and uploadtree.lft BETWEEN $lft and $rgt) as SS
121  where PF=pfile_fk and agent_fk=$agent_pk
122  and hash='$hash' and type='$type'
123  group by uploadtree_pk, pfile_fk, ufile_name
124  $order limit $limit offset $offset";
125  $result = pg_query($PG_CONN, $sql); // Top uploadtree_pk's
126  DBCheckResult($result, $sql, __FILE__, __LINE__);
127 
128  //echo "<br>$sql<br>";
129  return $result;
130 }
131 
145 function CountFilesWithCopyright($agent_pk, $hash, $type, $uploadtree_pk,
146 $PkgsOnly=false, $CheckOnly=false)
147 {
148  global $PG_CONN;
149 
150  /* Find lft and rgt bounds for this $uploadtree_pk */
151  $sql = "SELECT lft, rgt, upload_fk FROM uploadtree
152  WHERE uploadtree_pk = $uploadtree_pk";
153  $result = pg_query($PG_CONN, $sql);
154  DBCheckResult($result, $sql, __FILE__, __LINE__);
155  $row = pg_fetch_assoc($result);
156  $lft = $row["lft"];
157  $rgt = $row["rgt"];
158  $upload_pk = $row["upload_fk"];
159  pg_free_result($result);
160 
161  $chkonly = ($CheckOnly) ? " LIMIT 1" : "";
162 
163  $sql = "SELECT count(DISTINCT pfile_fk) as unique from copyright,
164  (SELECT pfile_fk as PF from uploadtree
165  where upload_fk=$upload_pk and uploadtree.lft BETWEEN $lft and $rgt) as SS
166  where PF=pfile_fk and agent_fk=$agent_pk and hash='$hash' and type='$type'
167  $chkonly";
168 
169  $result = pg_query($PG_CONN, $sql); // Top uploadtree_pk's
170  DBCheckResult($result, $sql, __FILE__, __LINE__);
171 
172  $row = pg_fetch_assoc($result);
173  $FileCount = $row['unique'];
174  pg_free_result($result);
175  return $FileCount;
176 }
177 
178 
191 function StmtReorder($content)
192 {
193  return $content;
194 }
195 
196 
210 function MassageContent(&$row, $hash)
211 {
212  /* Step 1: Clean up content
213  */
214  $OriginalContent = $row['content'];
215 
216  /* remove control characters */
217  $content = preg_replace('/[\x0-\x1f]/', ' ', $OriginalContent);
218 
219  if ($row['type'] == 'statement')
220  {
221  /* !"#$%&' */
222  $content = preg_replace('/([\x21-\x27])|([*@])/', ' ', $content);
223 
224  /* numbers-numbers, two or more digits, ', ' */
225  $content = preg_replace('/(([0-9]+)-([0-9]+))|([0-9]{2,})|(,)/', ' ', $content);
226  $content = preg_replace('/ : /', ' ', $content); // free :, probably followed a date
227  }
228  else
229  if ($row['type'] == 'email')
230  {
231  $content = str_replace(":;<=>()", " ", $content);
232  }
233 
234  /* remove double spaces */
235  $content = preg_replace('/\s\s+/', ' ', $content);
236 
237  /* remove leading/trailing whitespace and some punctuation */
238  $content = trim($content, "\t \n\r<>./\"\'");
239 
240  /* remove leading "dnl " */
241  if ((strlen($content) > 4) &&
242  (substr_compare($content, "dnl ", 0, 4, true) == 0)) {
243  $content = substr($content, 4);
244  }
245 
246  /* skip empty content */
247  if (empty($content)) { return true;
248  }
249 
250  /* Step 1B: rearrange copyright statments to try and put the holder first,
251  * followed by the rest of the statement, less copyright years.
252  */
253  /* Not yet implemented
254  if ($row['type'] == 'statement') $content = $this->StmtReorder($content);
255  */
256 
257  // $row['original'] = $OriginalContent; // to compare original to new content
258  $row['content'] = $content;
259  $row['copyright_count'] = 1;
260  $row['hash'] = md5($row['content']);
261  if ($hash && ($row['hash'] != $hash)) { return true;
262  }
263 
264  return false;
265 } /* End of MassageContent() */
StmtReorder($content)
rearrange copyright statment to try and put the holder first, followed by the rest of the statement...
Definition: library.php:191
hist_rowcmp($rowa, $rowb)
Sort query histogram results (by content), ascend.
Definition: library.php:56
hist_rowcmp_count_asc($a, $b)
Sort query histogram results (by content), ascend.
Definition: library.php:32
copyright_namecmp($rowa, $rowb)
Sort rows by filename.
Definition: library.php:78
GetFilesWithCopyright($agent_pk, $hash, $type, $uploadtree_pk, $PkgsOnly=false, $offset=0, $limit="ALL", $order="")
get files with a given copyright.
Definition: library.php:99
hist_rowcmp_desc($rowa, $rowb)
Sort query histogram results (by content), descend.
Definition: library.php:67
MassageContent(&$row, $hash)
Input row array contains: pfile, content and type.
Definition: library.php:210
hist_rowcmp_count_desc($a, $b)
Sort query histogram results (by content), descend.
Definition: library.php:44
foreach($Options as $Option=> $OptVal) if(0==$reference_flag &&0==$nomos_flag) $PG_CONN
CountFilesWithCopyright($agent_pk, $hash, $type, $uploadtree_pk, $PkgsOnly=false, $CheckOnly=false)
Definition: library.php:145
DBCheckResult($result, $sql, $filenm, $lineno)
Check the postgres result for unexpected errors. If found, treat them as fatal.
Definition: common-db.php:198
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:695