FOSSology  3.2.0rc1
Open Source License Compliance by Open Source Software
diff.c
1 /*
2 Author: Daniele Fognini, Andreas Wuerl
3 Copyright (C) 2013-2014, Siemens AG
4 
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18 
19 #include "diff.h"
20 
21 #include "_squareVisitor.h"
22 #include <stdlib.h>
23 
24 int matchNTokens(const GArray* textTokens, size_t textStart, size_t textLength,
25  const GArray* searchTokens, size_t searchStart, size_t searchLength,
26  unsigned int numberOfWantedMatches) {
27 
28  if (
29  textStart < textLength &&
30  searchStart < searchLength &&
31  tokenEquals(
32  tokens_index(textTokens, textStart),
33  tokens_index(searchTokens, searchStart
34  )))
35  {
36  unsigned matched = 1;
37 
38  size_t canMatch = MIN(textLength - textStart, searchLength - searchStart);
39  size_t shouldMatch = MIN(numberOfWantedMatches, canMatch);
40  for (size_t i = 1; i < shouldMatch; i++) {
41  if (tokenEquals(
42  tokens_index(textTokens, i + textStart),
43  tokens_index(searchTokens, i + searchStart)))
44  matched++;
45  else
46  break;
47  }
48  return (matched == shouldMatch);
49  }
50  return 0;
51 }
52 
53 int lookForDiff(const GArray* textTokens, const GArray* searchTokens,
54  size_t iText, size_t iSearch,
55  unsigned int maxAllowedDiff, unsigned int minAdjacentMatches,
56  DiffMatchInfo* result) {
57  size_t searchLength = searchTokens->len;
58  size_t textLength = textTokens->len;
59 
60  size_t searchStopAt = MIN(iSearch + maxAllowedDiff, searchLength);
61  size_t textStopAt = MIN(iText + maxAllowedDiff, textLength);
62 
63  size_t textPos;
64  size_t searchPos;
65  for (unsigned int i = 0; i < SQUARE_VISITOR_LENGTH; i++) {
66  textPos = iText + squareVisitorX[i];
67  searchPos = iSearch + squareVisitorY[i];
68 
69  if ((textPos < textStopAt) && (searchPos < searchStopAt))
70  if (matchNTokens(textTokens, textPos, textLength,
71  searchTokens, searchPos, searchLength,
72  minAdjacentMatches))
73  {
74  result->search.start = searchPos;
75  result->search.length = searchPos - iSearch;
76  result->text.start = textPos;
77  result->text.length = textPos - iText;
78  return 1;
79  }
80  }
81 
82  return 0;
83 }
84 
85 static int applyDiff(const DiffMatchInfo* diff,
86  GArray* matchedInfo,
87  size_t* additionsCounter, size_t* removedCounter,
88  size_t* iText, size_t* iSearch) {
89 
90  DiffMatchInfo diffCopy = *diff;
91  diffCopy.text.start = *iText;
92  diffCopy.search.start = *iSearch;
93 
94  if (diffCopy.search.length == 0)
95  diffCopy.diffType = DIFF_TYPE_ADDITION;
96  else if (diffCopy.text.length == 0)
97  diffCopy.diffType = DIFF_TYPE_REMOVAL;
98  else
99  diffCopy.diffType = DIFF_TYPE_REPLACE;
100 
101  *iText = diff->text.start;
102  *iSearch = diff->search.start;
103 
104  g_array_append_val(matchedInfo, diffCopy);
105 
106  *additionsCounter += diffCopy.text.length;
107  *removedCounter += diffCopy.search.length;
108 
109  return 1;
110 }
111 
112 static void applyTailDiff(GArray* matchedInfo,
113  size_t searchLength,
114  size_t* removedCounter, size_t* additionsCounter,
115  size_t* iText, size_t* iSearch) {
116 
117  DiffMatchInfo tailDiff = (DiffMatchInfo) {
118  .search = (DiffPoint) {
119  .start = (*iSearch),
120  .length = searchLength - (*iSearch)
121  },
122  .text = (DiffPoint) {
123  .start = (*iText),
124  .length = 0
125  },
126  .diffType = NULL
127  };
128 
129  applyDiff(&tailDiff, matchedInfo, additionsCounter, removedCounter, iText, iSearch);
130 }
131 
132 static void initSimpleMatch(DiffMatchInfo* simpleMatch, size_t iText, size_t iSearch) {
133  simpleMatch->text.start = iText;
134  simpleMatch->text.length = 0;
135  simpleMatch->search.start = iSearch;
136  simpleMatch->search.length = 0;
137 }
138 
151 DiffResult* findMatchAsDiffs(const GArray* textTokens, const GArray* searchTokens,
152  size_t textStartPosition, size_t searchStartPosition,
153  unsigned int maxAllowedDiff, unsigned int minAdjacentMatches) {
154  size_t textLength = textTokens->len;
155  size_t searchLength = searchTokens->len;
156 
157  if ((searchLength<=searchStartPosition) || (textLength<=textStartPosition)) {
158  return NULL;
159  }
160 
161  DiffResult* result = malloc(sizeof(DiffResult));
162  result->matchedInfo = g_array_new(FALSE, FALSE, sizeof(DiffMatchInfo));
163  GArray* matchedInfo = result->matchedInfo;
164 
165  size_t iText = textStartPosition;
166  size_t iSearch = 0;
167 
168  size_t removedCounter = 0;
169  size_t matchedCounter = 0;
170  size_t additionsCounter = 0;
171 
172  if (searchStartPosition > 0) {
173  DiffMatchInfo licenseHeadDiff = (DiffMatchInfo) {
174  .search = (DiffPoint) {
175  .start = 0,
176  .length = searchStartPosition
177  },
178  .text = (DiffPoint) {
179  .start = iText,
180  .length = 0
181  },
182  .diffType = NULL
183  };
184  applyDiff(&licenseHeadDiff, matchedInfo, &additionsCounter, &removedCounter, &iText, &iSearch);
185  iSearch = searchStartPosition;
186  }
187 
188  // match first token
189  while (iText < textLength) {
190  Token* textToken = tokens_index(textTokens, iText);
191  Token* searchToken = tokens_index(searchTokens, iSearch);
192  if (tokenEquals(textToken, searchToken))
193  break;
194  iText++;
195  }
196 
197  if (iText < textLength) {
198  DiffMatchInfo simpleMatch;
199  simpleMatch.diffType = DIFF_TYPE_MATCH;
200  initSimpleMatch(&simpleMatch, iText, iSearch);
201 
202  while ((iText < textLength) && (iSearch < searchLength)) {
203  Token* textToken = tokens_index(textTokens, iText);
204  Token* searchToken = tokens_index(searchTokens, iSearch);
205 
206  if (tokenEquals(textToken, searchToken)) {
207  simpleMatch.text.length++;
208  simpleMatch.search.length++;
209  matchedCounter++;
210  iSearch++;
211  iText++;
212  } else {
213  /* the previous tokens matched, here starts a difference */
214  g_array_append_val(matchedInfo, simpleMatch);
215  initSimpleMatch(&simpleMatch, iText, iSearch);
216 
217  DiffMatchInfo diff;
218  if (lookForDiff(textTokens, searchTokens,
219  iText, iSearch,
220  maxAllowedDiff, minAdjacentMatches,
221  &diff)) {
222  applyDiff(&diff,
223  matchedInfo,
224  &additionsCounter, &removedCounter,
225  &iText, &iSearch);
226 
227  simpleMatch.text.start = iText;
228  simpleMatch.search.start = iSearch;
229  } else {
230  break;
231  }
232  }
233  }
234  if (simpleMatch.text.length > 0) {
235  g_array_append_val(matchedInfo, simpleMatch);
236  }
237  }
238 
239  if ((iSearch < searchLength) && (searchLength < maxAllowedDiff + iSearch)) {
240  applyTailDiff(matchedInfo, searchLength, &removedCounter, &additionsCounter, &iText, &iSearch);
241  }
242 
243  if (matchedCounter + removedCounter != searchLength) {
244  g_array_free(matchedInfo, TRUE);
245  free(result);
246 
247  return NULL;
248  } else {
249 #ifdef DEBUG_DIFF
250  printf("diff: (=%zu +%zu -%zu)/%zu\n", matchedCounter, additionsCounter, removedCounter, searchLength);
251  for (size_t i=0; i<matchedInfo->len; i++) {
252  DiffMatchInfo current = g_array_index(matchedInfo, DiffMatchInfo, i);
253  printf("info[%zu]: t[%zu+%zu] %s s[%zu+%zu]}\n",
254  i,
255  current.text.start,
256  current.text.length,
257  current.diffType,
258  current.search.start,
259  current.search.length
260  );
261  }
262  printf("\n");
263 #endif
264  result->removed = removedCounter;
265  result->added = additionsCounter;
266  result->matched = matchedCounter;
267  return result;
268  }
269 }
270 
271 void diffResult_free(DiffResult* diffResult) {
272  g_array_free(diffResult->matchedInfo, TRUE);
273  free(diffResult);
274 }
Definition: diff.h:25
#define MIN(a, b)
Min of two.
Definition: licenses.c:76