20 #include <CUnit/CUnit.h> 24 #include "string_operations.h" 28 void test_tokenize() {
29 char* test = g_strdup(
"^foo^^ba^");
31 GArray* token = tokenize(test,
"^");
33 CU_ASSERT_EQUAL(token->len, 2);
34 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).hashedContent, hash(
"foo"));
35 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).length, 3);
36 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).removedBefore, 1);
37 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).hashedContent, hash(
"ba"));
38 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).length, 2);
39 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).removedBefore, 2);
41 g_array_free(token, TRUE);
45 void test_tokenizeWithSpecialDelims() {
46 char* test = g_strdup(
"/*foo \n * bar \n *baz*/ ***booo \n:: qoo ");
48 GArray* token = tokenize(test,
" \n");
49 CU_ASSERT_EQUAL(token->len, 5);
50 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).hashedContent, hash(
"foo"));
51 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).length, 3);
52 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).removedBefore, 2);
53 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).hashedContent, hash(
"bar"));
54 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).length, 3);
55 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).removedBefore, 5);
56 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).hashedContent, hash(
"baz"));
57 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).length, 3);
58 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).removedBefore, 4);
59 CU_ASSERT_EQUAL(g_array_index(token,
Token, 3).hashedContent, hash(
"booo"));
60 CU_ASSERT_EQUAL(g_array_index(token,
Token, 3).length, 4);
61 CU_ASSERT_EQUAL(g_array_index(token,
Token, 3).removedBefore, 6);
62 CU_ASSERT_EQUAL(g_array_index(token,
Token, 4).hashedContent, hash(
"qoo"));
63 CU_ASSERT_EQUAL(g_array_index(token,
Token, 4).length, 3);
64 CU_ASSERT_EQUAL(g_array_index(token,
Token, 4).removedBefore, 5);
65 g_array_free(token, TRUE);
69 void test_streamTokenize() {
70 char* test = g_strdup(
"^foo^^ba^REM^boooREM^REM^");
71 const char* delimiters =
"^";
73 GArray* token = tokens_new();
75 Token* remainder = NULL;
77 size_t len = strlen(test);
83 unsigned int tokenCount = token->len;
84 int thisChunkSize =
MIN(chunkSize, len - rea);
86 int addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
88 CU_ASSERT_EQUAL(addedTokens, token->len - tokenCount);
93 streamTokenize(NULL, 0, NULL, &token, &remainder);
95 CU_ASSERT_EQUAL_FATAL(token->len, 3);
96 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).hashedContent, hash(
"foo"));
97 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).length, 3);
98 CU_ASSERT_EQUAL(g_array_index(token,
Token, 0).removedBefore, 1);
99 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).hashedContent, hash(
"ba"));
100 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).length, 2);
101 CU_ASSERT_EQUAL(g_array_index(token,
Token, 1).removedBefore, 2);
102 #ifndef MONK_CASE_INSENSITIVE 103 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).hashedContent, hash(
"boooREM"));
105 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).hashedContent, hash(
"booorem"));
107 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).length, 7);
108 CU_ASSERT_EQUAL(g_array_index(token,
Token, 2).removedBefore, 5);
110 CU_ASSERT_PTR_NULL(remainder);
112 CU_ASSERT_EQUAL(token_position_of(3, token), 20);
118 void test_streamTokenizeEventuallyGivesUp() {
119 char* test = g_strdup(
"^foo^^ba");
120 const char* delimiters =
"^";
122 GArray* token = tokens_new();
124 Token* remainder = NULL;
126 char* endPtr = test + strlen(test);
128 printf(
"test: expecting a warning: ");
133 while ((i < 1 << 27) && (*ptr) && (ptr <= endPtr)) {
134 unsigned int tokenCount = token->len;
135 int thisChunkSize =
MIN(chunkSize, endPtr - ptr);
137 addedTokens = streamTokenize(ptr, thisChunkSize, delimiters, &token, &remainder);
139 if (addedTokens == -1) {
142 if (addedTokens != token->len - tokenCount)
143 CU_FAIL(
"wrong return value from streamTokenize()");
147 streamTokenize(NULL, 0, NULL, &token, &remainder);
149 CU_ASSERT_EQUAL(addedTokens, -1);
151 CU_ASSERT_TRUE(token->len > 0);
153 g_array_free(token, TRUE);
157 void assertTokenPosition(
char*
string,
int count, ...) {
158 char* test = g_strdup(
string);
160 GArray* tokens = tokenize(test,
"^");
162 CU_ASSERT_EQUAL(tokens->len, count);
163 if (tokens->len == count) {
166 va_start(argptr, count);
167 for (
size_t i = 0; i < tokens->len; i++) {
168 int expected = va_arg(argptr,
int);
169 size_t current = token_position_of(i, tokens);
170 if (current != expected) {
171 printf(
"ASSERT tokenizing '%s': posof(token[%ld]) == %ld != %d\n",
string, i, current, expected);
172 CU_FAIL(
"see output");
175 CU_ASSERT_EQUAL(current, token_position_of(i, tokens));
179 printf(
"ASSERT tokenizing '%s': token count %d != %d\n",
string, tokens->len, count);
182 g_array_free(tokens, TRUE);
186 void test_tokenPosition() {
187 assertTokenPosition(
"foo", 1, 0);
188 assertTokenPosition(
"^foo", 1, 1);
189 assertTokenPosition(
"^foo^^bar", 2, 1, 6);
190 assertTokenPosition(
"foo^^bar", 2, 0, 5);
191 assertTokenPosition(
"^foo^^bar^^^^^baz", 3, 1, 6, 14);
194 void test_tokenPositionAtEnd() {
195 char* test = g_strdup(
"^^23^5^7");
196 GArray* tokens = tokenize(test,
"^");
198 CU_ASSERT_EQUAL(token_position_of(0, tokens), 2);
199 CU_ASSERT_EQUAL(token_position_of(1, tokens), 5);
200 CU_ASSERT_EQUAL(token_position_of(2, tokens), 7);
201 CU_ASSERT_EQUAL(token_position_of(3, tokens), 8);
203 g_array_free(tokens, TRUE);
207 void test_token_equal() {
208 char* text = g_strdup(
"^foo^^bar^ba^barr");
209 char*
search = g_strdup(
"bar^^foo^");
211 GArray* tokenizedText = tokenize(text,
"^");
212 GArray* tokenizedSearch = tokenize(search,
"^");
214 Token* t0 = tokens_index(tokenizedText, 0);
215 Token* t1 = tokens_index(tokenizedText, 1);
216 Token* t2 = tokens_index(tokenizedText, 2);
217 Token* t3 = tokens_index(tokenizedText, 3);
218 Token* s0 = tokens_index(tokenizedSearch, 0);
219 Token* s1 = tokens_index(tokenizedSearch, 1);
221 CU_ASSERT_TRUE(tokenEquals(t0, s1));
222 CU_ASSERT_TRUE(tokenEquals(t1, s0));
223 CU_ASSERT_FALSE(tokenEquals(t2, s0));
224 CU_ASSERT_FALSE(tokenEquals(t3, s0));
226 g_array_free(tokenizedText, TRUE);
227 g_array_free(tokenizedSearch, TRUE);
232 CU_TestInfo string_operations_testcases[] = {
233 {
"Testing tokenize:", test_tokenize},
234 {
"Testing tokenize with special delimiters:", test_tokenizeWithSpecialDelims},
235 {
"Testing stream tokenize:", test_streamTokenize},
236 {
"Testing stream tokenize with too long stream:",test_streamTokenizeEventuallyGivesUp},
237 {
"Testing find token position in string:", test_tokenPosition},
238 {
"Testing find token position at end:", test_tokenPositionAtEnd},
239 {
"Testing token equals:", test_token_equal},
#define MIN(a, b)
Min of two.