FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
FossologyUnicodeClean.cc
1 /*
2  * Copyright (C) 2019, Siemens AG
3  * Author: Gaurav Mishra <mishra.gaurav@siemens.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * version 2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
20 
21 using namespace std;
22 
27 {
28  this->flush();
29  if (this->destinationFile.is_open())
30  {
31  this->destinationFile.close();
32  }
33  if (this->sourceFile.is_open())
34  {
35  this->sourceFile.close();
36  }
37 }
38 
46  string &destination) : sourceFile(NULL), destinationFile(NULL),
47  bufferSize (0), stopRead(false)
48 {
49  if ((!source.empty() && !destination.empty()) && (source == destination))
50  {
51  cerr << "Input and Output files can not be same.\n";
52  cerr << "Input: " << source << "\nOutput: " << destination;
53  cerr << " passed" << endl;
54  exit(-3);
55  }
56  if (!source.empty())
57  {
58  sourceFile.open(source, ios::in | ios::binary);
59  if (sourceFile.fail())
60  {
61  cerr << "Unable to open " << source << endl;
62  cerr << "Error: " << strerror(errno) << endl;
63  exit(-1);
64  }
65  }
66  if (!destination.empty())
67  {
68  destinationFile.open(destination, ios::out | ios::binary | ios::trunc);
69  if (destinationFile.fail())
70  {
71  cerr << "Unable to open " << destination << endl;
72  cerr << "Error: " << strerror(errno) << endl;
73  exit(-2);
74  }
75  }
76  this->buffer.reserve(MAX_BUFFER_LEN);
77 }
78 
84 {
85  string input;
86  input = this->dirtyRead();
87  while (!this->stopRead)
88  {
89  icu::UnicodeString output = fo::recodeToUnicode(input);
90  this->write(output);
91  input = this->dirtyRead();
92  }
93  this->flush();
94 }
95 
101 {
102  string input;
103  if (sourceFile.eof() || cin.eof())
104  {
105  this->stopRead = true;
106  return "";
107  }
108  if (sourceFile && sourceFile.is_open())
109  {
110  std::getline(sourceFile, input, '\n');
111  }
112  else
113  {
114  std::getline(cin, input, '\n');
115  }
116  return input;
117 }
118 
126 void FossologyUnicodeClean::write(const icu::UnicodeString &output)
127 {
128  this->buffer.push_back(output);
129  this->bufferSize++;
130  if (this->bufferSize == MAX_BUFFER_LEN)
131  {
132  this->flush();
133  }
134 }
135 
143 {
144  if (destinationFile && destinationFile.is_open())
145  {
146  for (size_t i = 0; i < this->buffer.size(); i++)
147  {
148  string temp;
149  buffer[i].toUTF8String(temp);
150  destinationFile << temp << "\n";
151  }
152  }
153  else
154  {
155  for (size_t i = 0; i < this->buffer.size(); i++)
156  {
157  string temp;
158  buffer[i].toUTF8String(temp);
159  cout << temp << "\n";
160  }
161  }
162  buffer.clear();
163  bufferSize = 0;
164 }
165 
174 bool parseCliOptions(int argc, char **argv, string &input, string &output)
175 {
176  boost::program_options::options_description desc("fo_unicode_clean "
177  ": recognized options");
178  desc.add_options()
179  (
180  "help,h", "shows help"
181  )
182  (
183  "input,i",
184  boost::program_options::value<string>(),
185  "file to read"
186  )
187  (
188  "output,o",
189  boost::program_options::value<string>(),
190  "output file"
191  )
192  ;
193 
194  boost::program_options::variables_map vm;
195 
196  try
197  {
198  boost::program_options::store(
199  boost::program_options::command_line_parser(argc,
200  argv).options(desc).run(), vm);
201 
202  if (vm.count("help") > 0)
203  {
204  cout << desc << endl;
205  cout << "If no input passed, read from STDIN." << endl;
206  cout << "If no output passed, print to STDOUT." << endl;
207  exit(0);
208  }
209 
210  if (vm.count("input"))
211  {
212  input = vm["input"].as<string>();
213  }
214  if (vm.count("output"))
215  {
216  output = vm["output"].as<string>();
217  }
218  return true;
219  }
220  catch (boost::bad_any_cast&)
221  {
222  cout << "wrong parameter type" << endl;
223  cout << desc << endl;
224  return false;
225  }
226  catch (boost::program_options::error&)
227  {
228  cout << "wrong command line arguments" << endl;
229  cout << desc << endl;
230  return false;
231  }
232 }
233 
234 int main(int argc, char **argv)
235 {
236  string input, output;
237  if (parseCliOptions(argc, argv, input, output))
238  {
239  FossologyUnicodeClean obj(input, output);
240  obj.startConvert();
241  return 0;
242  }
243  return -4;
244 }
#define MAX_BUFFER_LEN
void flush()
Flush the buffers and reset the internal buffer.
const std::string dirtyRead()
void write(const icu::UnicodeString &output)
Write the string to file/stream.
FossologyUnicodeClean(std::string &source, std::string &destination)
bool parseCliOptions(int argc, char **argv, CliOptions &dest, std::vector< std::string > &fileNames, std::string &directoryToScan)
Parse the options sent by CLI to CliOptions object.
char buffer[2048]
The last thing received from the scheduler.
icu::UnicodeString recodeToUnicode(const std::string &input)
Definition: libfossUtils.cc:43