FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
test_string_operations.c
1 /*
2 Author: Daniele Fognini, Andreas Wuerl
3 Copyright (C) 2013-2014, Siemens AG
4 
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <CUnit/CUnit.h>
21 #include <stdarg.h>
22 #include <stdint.h>
23 
24 #include "string_operations.h"
25 #include "hash.h"
26 #include "monk.h"
27 
28 void test_tokenize() {
29  char* test = g_strdup("^foo^^ba^");
30 
31  GArray* token = tokenize(test, "^");
32 
33  CU_ASSERT_EQUAL(token->len, 2);
34  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).hashedContent, hash("foo"));
35  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).length, 3);
36  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).removedBefore, 1);
37  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).hashedContent, hash("ba"));
38  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).length, 2);
39  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).removedBefore, 2);
40 
41  g_array_free(token, TRUE);
42  g_free(test);
43 }
44 
45 void test_tokenizeWithSpecialDelims() {
46  char* test = g_strdup("/*foo \n * bar \n *baz*/ ***booo \n:: qoo ");
47 
48  GArray* token = tokenize(test, " \n");
49  CU_ASSERT_EQUAL(token->len, 5);
50  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).hashedContent, hash("foo"));
51  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).length, 3);
52  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).removedBefore, 2);
53  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).hashedContent, hash("bar"));
54  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).length, 3);
55  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).removedBefore, 5);
56  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).hashedContent, hash("baz"));
57  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).length, 3);
58  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).removedBefore, 4);
59  CU_ASSERT_EQUAL(g_array_index(token, Token, 3).hashedContent, hash("booo"));
60  CU_ASSERT_EQUAL(g_array_index(token, Token, 3).length, 4);
61  CU_ASSERT_EQUAL(g_array_index(token, Token, 3).removedBefore, 6);
62  CU_ASSERT_EQUAL(g_array_index(token, Token, 4).hashedContent, hash("qoo"));
63  CU_ASSERT_EQUAL(g_array_index(token, Token, 4).length, 3);
64  CU_ASSERT_EQUAL(g_array_index(token, Token, 4).removedBefore, 5);
65  g_array_free(token, TRUE);
66  g_free(test);
67 }
68 
69 void test_streamTokenize() {
70  char* test = g_strdup("^foo^^ba^REM^boooREM^REM^");
71  const char* delimiters = "^";
72 
73  GArray* token = tokens_new();
74 
75  Token* remainder = NULL;
76 
77  size_t len = strlen(test);
78 
79  int chunkSize = 2;
80  char* ptr = test;
81  size_t rea = 0;
82  while (rea < len) {
83  unsigned int tokenCount = token->len;
84  int thisChunkSize = MIN(chunkSize, len - rea);
85 
86  int addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
87 
88  CU_ASSERT_EQUAL(addedTokens, token->len - tokenCount);
89 
90  ptr += chunkSize;
91  rea += chunkSize;
92  }
93  streamTokenize(NULL, 0, NULL, &token, &remainder);
94 
95  CU_ASSERT_EQUAL_FATAL(token->len, 3);
96  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).hashedContent, hash("foo"));
97  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).length, 3);
98  CU_ASSERT_EQUAL(g_array_index(token, Token, 0).removedBefore, 1);
99  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).hashedContent, hash("ba"));
100  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).length, 2);
101  CU_ASSERT_EQUAL(g_array_index(token, Token, 1).removedBefore, 2);
102 #ifndef MONK_CASE_INSENSITIVE
103  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).hashedContent, hash("boooREM"));
104 #else
105  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).hashedContent, hash("booorem"));
106 #endif
107  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).length, 7);
108  CU_ASSERT_EQUAL(g_array_index(token, Token, 2).removedBefore, 5);
109 
110  CU_ASSERT_PTR_NULL(remainder);
111 
112  CU_ASSERT_EQUAL(token_position_of(3, token), 20);
113 
114  tokens_free(token);
115  g_free(test);
116 }
117 
118 void test_streamTokenizeEventuallyGivesUp() {
119  char* test = g_strdup("^foo^^ba");
120  const char* delimiters = "^";
121 
122  GArray* token = tokens_new();
123 
124  Token* remainder = NULL;
125 
126  char* endPtr = test + strlen(test);
127 
128  printf("test: expecting a warning: ");
129  int chunkSize = 5;
130  char* ptr = test;
131  int addedTokens = 0;
132  uint32_t i = 0;
133  while ((i < 1 << 27) && (*ptr) && (ptr <= endPtr)) {
134  unsigned int tokenCount = token->len;
135  int thisChunkSize = MIN(chunkSize, endPtr - ptr);
136 
137  addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
138 
139  if (addedTokens == -1) {
140  break;
141  } else
142  if (addedTokens != token->len - tokenCount)
143  CU_FAIL("wrong return value from streamTokenize()");
144 
145  i++;
146  }
147  streamTokenize(NULL, 0, NULL, &token, &remainder);
148 
149  CU_ASSERT_EQUAL(addedTokens, -1);
150 
151  CU_ASSERT_TRUE(token->len > 0);
152 
153  g_array_free(token, TRUE);
154  g_free(test);
155 }
156 
157 void assertTokenPosition(char* string, int count, ...) {
158  char* test = g_strdup(string);
159 
160  GArray* tokens = tokenize(test, "^");
161 
162  CU_ASSERT_EQUAL(tokens->len, count);
163  if (tokens->len == count) {
164 
165  va_list argptr;
166  va_start(argptr, count);
167  for (size_t i = 0; i < tokens->len; i++) {
168  int expected = va_arg(argptr, int);
169  size_t current = token_position_of(i, tokens);
170  if (current != expected) {
171  printf("ASSERT tokenizing '%s': posof(token[%ld]) == %ld != %d\n", string, i, current, expected);
172  CU_FAIL("see output");
173  break;
174  }
175  CU_ASSERT_EQUAL(current, token_position_of(i, tokens));
176  }
177  va_end(argptr);
178  } else {
179  printf("ASSERT tokenizing '%s': token count %d != %d\n", string, tokens->len, count);
180  }
181 
182  g_array_free(tokens, TRUE);
183  g_free(test);
184 }
185 
186 void test_tokenPosition() {
187  assertTokenPosition("foo", 1, 0);
188  assertTokenPosition("^foo", 1, 1);
189  assertTokenPosition("^foo^^bar", 2, 1, 6);
190  assertTokenPosition("foo^^bar", 2, 0, 5);
191  assertTokenPosition("^foo^^bar^^^^^baz", 3, 1, 6, 14);
192 }
193 
194 void test_tokenPositionAtEnd() {
195  char* test = g_strdup("^^23^5^7");
196  GArray* tokens = tokenize(test, "^");
197 
198  CU_ASSERT_EQUAL(token_position_of(0, tokens), 2);
199  CU_ASSERT_EQUAL(token_position_of(1, tokens), 5);
200  CU_ASSERT_EQUAL(token_position_of(2, tokens), 7);
201  CU_ASSERT_EQUAL(token_position_of(3, tokens), 8);
202 
203  g_array_free(tokens, TRUE);
204  g_free(test);
205 }
206 
207 void test_token_equal() {
208  char* text = g_strdup("^foo^^bar^ba^barr");
209  char* search = g_strdup("bar^^foo^");
210 
211  GArray* tokenizedText = tokenize(text, "^");
212  GArray* tokenizedSearch = tokenize(search, "^");
213 
214  Token* t0 = tokens_index(tokenizedText, 0);
215  Token* t1 = tokens_index(tokenizedText, 1);
216  Token* t2 = tokens_index(tokenizedText, 2);
217  Token* t3 = tokens_index(tokenizedText, 3);
218  Token* s0 = tokens_index(tokenizedSearch, 0);
219  Token* s1 = tokens_index(tokenizedSearch, 1);
220 
221  CU_ASSERT_TRUE(tokenEquals(t0, s1)); // foo == foo
222  CU_ASSERT_TRUE(tokenEquals(t1, s0)); // bar == bar
223  CU_ASSERT_FALSE(tokenEquals(t2, s0)); // ba != bar
224  CU_ASSERT_FALSE(tokenEquals(t3, s0)); // barr != bar
225 
226  g_array_free(tokenizedText, TRUE);
227  g_array_free(tokenizedSearch, TRUE);
228  g_free(text);
229  g_free(search);
230 }
231 
232 CU_TestInfo string_operations_testcases[] = {
233  {"Testing tokenize:", test_tokenize},
234  {"Testing tokenize with special delimiters:", test_tokenizeWithSpecialDelims},
235  {"Testing stream tokenize:", test_streamTokenize},
236  {"Testing stream tokenize with too long stream:",test_streamTokenizeEventuallyGivesUp},
237  {"Testing find token position in string:", test_tokenPosition},
238  {"Testing find token position at end:", test_tokenPositionAtEnd},
239  {"Testing token equals:", test_token_equal},
240  CU_TEST_INFO_NULL
241 };
#define MIN(a, b)
Min of two.
Definition: licenses.c:76