FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
encoding.c
1 /*
2 Author: Daniele Fognini
3 Copyright (C) 2015, Siemens AG
4 
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18 
19 #include "encoding.h"
20 
21 #ifdef HAVE_CHARDET
22 #include <uchardet.h>
23 #else
24 #include <magic.h>
25 #endif
26 
27 #include <string.h>
28 #include <stdio.h>
29 
30 iconv_t guessConverter(const char* buffer, size_t len)
31 {
32  char* const target = "utf-8";
33 
34  iconv_t iconvCookie = NULL;
35 
36  gchar* encoding = guessEncoding(buffer, len);
37  if (encoding && (strcmp(encoding, target) != 0))
38  {
39  iconvCookie = iconv_open(target, encoding);
40  g_free(encoding);
41  }
42 
43  return iconvCookie;
44 }
45 
46 gchar* guessEncoding(const char* buffer, size_t len) {
47  gchar* result = NULL;
48 #ifdef HAVE_CHARDET
49  uchardet_t cd = uchardet_new();
50  if (!uchardet_handle_data(cd, buffer, len)) {
51  uchardet_data_end(cd);
52 
53  const char* chardet = uchardet_get_charset(cd);
54 
55  if (chardet && strcmp(chardet, "")!=0) {
56  result = g_strdup(chardet);
57  }
58  }
59 
60  uchardet_delete(cd);
61 #else
62  magic_t cookie = magic_open(MAGIC_MIME);
63  magic_load(cookie, NULL);
64 
65  const char* resp = magic_buffer(cookie, buffer, len);
66 
67  if (!resp)
68  {
69  printf("magic error: %s\n", magic_error(cookie));
70  goto done;
71  }
72 
73  char* charset = strstr(resp, "charset=");
74 
75  if (!charset)
76  {
77  goto done;
78  }
79 
80  charset += 8; // len of "charset="
81 
82  result = g_strdup(charset);
83 
84 done:
85  magic_close(cookie);
86 #endif
87  return result;
88 }
char buffer[2048]
The last thing received from the scheduler.