FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
common-string.php
Go to the documentation of this file.
1 <?php
2 /***********************************************************
3  * Copyright (C) 2014 Siemens AG
4  * Author: D.Fognini, S. Weber, J.Najjar
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * version 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  ***********************************************************/
19 
25 // For compatibility with older php versions
26 if (! defined('ENT_SUBSTITUTE')) {
27  define('ENT_SUBSTITUTE', 0);
28 }
29 
36 function convertToUTF8($content, $toHTML=true)
37 {
38  if (strlen($content) == 0) {
39  return '';
40  }
41  if (checkUTF8($content)) {
42  $output1 = $content;
43  } else {
44  $output1 = tryConvertToUTF8($content);
45  if (! $output1 || ! checkUTF8($output1)) {
46  $output1 = $toHTML ? "<Unknown encoding>" : "<b>Unknown encoding</b>";
47  }
48  }
49 
50  if (! $toHTML) {
51  return $output1;
52  }
53  return (htmlspecialchars($output1, ENT_SUBSTITUTE, "UTF-8")) ?: "<b>Unknown encoding</b>";
54 }
55 
61 function checkUTF8($content)
62 {
63  return mb_check_encoding($content, "UTF-8");
64 }
65 
71 function tryConvertToUTF8($content)
72 {
73  $inCharset = mb_detect_encoding($content, mb_detect_order(), true);
74  $output1 = false;
75  if (! $inCharset) {
76  $charsets = array('iso-8859-1', 'windows-1251', 'GB2312');
77  foreach ($charsets as $charset) {
78  $output1 = iconv($charset, "UTF-8", $content);
79  if ($output1) {
80  break;
81  }
82  }
83  } else if ($inCharset != "UTF-8") {
84  $output1 = iconv($inCharset, "UTF-8", $content);
85  }
86  return $output1;
87 }
checkUTF8($content)
tryConvertToUTF8($content)
if(!defined('ENT_SUBSTITUTE')) convertToUTF8($content, $toHTML=true)