Line data Source code
1 : // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 : // for details. All rights reserved. Use of this source code is governed by a
3 : // BSD-style license that can be found in the LICENSE file.
4 :
5 : // Character constants.
6 : const int _zero = 0x30;
7 : const int _upperCaseA = 0x41;
8 : const int _upperCaseZ = 0x5a;
9 : const int _lowerCaseA = 0x61;
10 : const int _lowerCaseZ = 0x7a;
11 : const int _asciiCaseBit = 0x20;
12 :
13 : /// Checks if strings [a] and [b] differ only on the case of ASCII letters.
14 : ///
15 : /// Strings are equal if they have the same length, and the characters at
16 : /// each index are the same, or they are ASCII letters where one is upper-case
17 : /// and the other is the lower-case version of the same letter.
18 : ///
19 : /// The comparison does not ignore the case of non-ASCII letters, so
20 : /// an upper-case ae-ligature (Æ) is different from
21 : /// a lower case ae-ligature (æ).
22 : ///
23 : /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
24 : /// for situations where the strings are known to be ASCII. Examples could
25 : /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
26 : /// strings with a known structure.
27 0 : bool equalsIgnoreAsciiCase(String a, String b) {
28 0 : if (a.length != b.length) return false;
29 0 : for (var i = 0; i < a.length; i++) {
30 0 : var aChar = a.codeUnitAt(i);
31 0 : var bChar = b.codeUnitAt(i);
32 0 : if (aChar == bChar) continue;
33 : // Quick-check for whether this may be different cases of the same letter.
34 0 : if (aChar ^ bChar != _asciiCaseBit) return false;
35 : // If it's possible, then check if either character is actually an ASCII
36 : // letter.
37 0 : var aCharLowerCase = aChar | _asciiCaseBit;
38 0 : if (_lowerCaseA <= aCharLowerCase && aCharLowerCase <= _lowerCaseZ) {
39 : continue;
40 : }
41 : return false;
42 : }
43 : return true;
44 : }
45 :
46 : /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase].
47 : ///
48 : /// The hash code is unaffected by changing the case of ASCII letters, but
49 : /// the case of non-ASCII letters do affect the result.
50 0 : int hashIgnoreAsciiCase(String string) {
51 : // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function).
52 : // adapted to smi values.
53 : // Same hash used by dart2js for strings, modified to ignore ASCII letter
54 : // case.
55 : var hash = 0;
56 0 : for (var i = 0; i < string.length; i++) {
57 0 : var char = string.codeUnitAt(i);
58 : // Convert lower-case ASCII letters to upper case.upper
59 : // This ensures that strings that differ only in case will have the
60 : // same hash code.
61 0 : if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit;
62 0 : hash = 0x1fffffff & (hash + char);
63 0 : hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10));
64 0 : hash >>= 6;
65 : }
66 0 : hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3));
67 0 : hash >>= 11;
68 0 : return 0x1fffffff & (hash + ((0x00003fff & hash) << 15));
69 : }
70 :
71 : /// Compares [a] and [b] lexically, converting ASCII letters to upper case.
72 : ///
73 : /// Comparison treats all lower-case ASCII letters as upper-case letters,
74 : /// but does no case conversion for non-ASCII letters.
75 : ///
76 : /// If two strings differ only on the case of ASCII letters, the one with the
77 : /// capital letter at the first difference will compare as less than the other
78 : /// string. This tie-breaking ensures that the comparison is a total ordering
79 : /// on strings and is compatible with equality.
80 : ///
81 : /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
82 : /// for situations where the strings are known to be ASCII. Examples could
83 : /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
84 : /// strings with a known structure.
85 0 : int compareAsciiUpperCase(String a, String b) {
86 : var defaultResult = 0; // Returned if no difference found.
87 0 : for (var i = 0; i < a.length; i++) {
88 0 : if (i >= b.length) return 1;
89 0 : var aChar = a.codeUnitAt(i);
90 0 : var bChar = b.codeUnitAt(i);
91 0 : if (aChar == bChar) continue;
92 : // Upper-case if letters.
93 : var aUpperCase = aChar;
94 : var bUpperCase = bChar;
95 0 : if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) {
96 0 : aUpperCase -= _asciiCaseBit;
97 : }
98 0 : if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) {
99 0 : bUpperCase -= _asciiCaseBit;
100 : }
101 0 : if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign;
102 0 : if (defaultResult == 0) defaultResult = (aChar - bChar);
103 : }
104 0 : if (b.length > a.length) return -1;
105 0 : return defaultResult.sign;
106 : }
107 :
108 : /// Compares [a] and [b] lexically, converting ASCII letters to lower case.
109 : ///
110 : /// Comparison treats all upper-case ASCII letters as lower-case letters,
111 : /// but does no case conversion for non-ASCII letters.
112 : ///
113 : /// If two strings differ only on the case of ASCII letters, the one with the
114 : /// capital letter at the first difference will compare as less than the other
115 : /// string. This tie-breaking ensures that the comparison is a total ordering
116 : /// on strings.
117 : ///
118 : /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
119 : /// for situations where the strings are known to be ASCII. Examples could
120 : /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
121 : /// strings with a known structure.
122 0 : int compareAsciiLowerCase(String a, String b) {
123 : var defaultResult = 0;
124 0 : for (var i = 0; i < a.length; i++) {
125 0 : if (i >= b.length) return 1;
126 0 : var aChar = a.codeUnitAt(i);
127 0 : var bChar = b.codeUnitAt(i);
128 0 : if (aChar == bChar) continue;
129 : var aLowerCase = aChar;
130 : var bLowerCase = bChar;
131 : // Upper case if ASCII letters.
132 0 : if (_upperCaseA <= bChar && bChar <= _upperCaseZ) {
133 0 : bLowerCase += _asciiCaseBit;
134 : }
135 0 : if (_upperCaseA <= aChar && aChar <= _upperCaseZ) {
136 0 : aLowerCase += _asciiCaseBit;
137 : }
138 0 : if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign;
139 0 : if (defaultResult == 0) defaultResult = aChar - bChar;
140 : }
141 0 : if (b.length > a.length) return -1;
142 0 : return defaultResult.sign;
143 : }
144 :
145 : /// Compares strings [a] and [b] according to [natural sort ordering][].
146 : ///
147 : /// A natural sort ordering is a lexical ordering where embedded
148 : /// numerals (digit sequences) are treated as a single unit and ordered by
149 : /// numerical value.
150 : /// This means that `"a10b"` will be ordered after `"a7b"` in natural
151 : /// ordering, where lexical ordering would put the `1` before the `7`, ignoring
152 : /// that the `1` is part of a larger number.
153 : ///
154 : /// Example:
155 : /// The following strings are in the order they would be sorted by using this
156 : /// comparison function:
157 : ///
158 : /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa"
159 : ///
160 : /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order
161 0 : int compareNatural(String a, String b) {
162 0 : for (var i = 0; i < a.length; i++) {
163 0 : if (i >= b.length) return 1;
164 0 : var aChar = a.codeUnitAt(i);
165 0 : var bChar = b.codeUnitAt(i);
166 0 : if (aChar != bChar) {
167 0 : return _compareNaturally(a, b, i, aChar, bChar);
168 : }
169 : }
170 0 : if (b.length > a.length) return -1;
171 : return 0;
172 : }
173 :
174 : /// Compares strings [a] and [b] according to lower-case
175 : /// [natural sort ordering][].
176 : ///
177 : /// ASCII letters are converted to lower case before being compared, like
178 : /// for [compareAsciiLowerCase], then the result is compared like for
179 : /// [compareNatural].
180 : ///
181 : /// If two strings differ only on the case of ASCII letters, the one with the
182 : /// capital letter at the first difference will compare as less than the other
183 : /// string. This tie-breaking ensures that the comparison is a total ordering
184 : /// on strings.
185 : ///
186 : /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order
187 0 : int compareAsciiLowerCaseNatural(String a, String b) {
188 : var defaultResult = 0; // Returned if no difference found.
189 0 : for (var i = 0; i < a.length; i++) {
190 0 : if (i >= b.length) return 1;
191 0 : var aChar = a.codeUnitAt(i);
192 0 : var bChar = b.codeUnitAt(i);
193 0 : if (aChar == bChar) continue;
194 : var aLowerCase = aChar;
195 : var bLowerCase = bChar;
196 0 : if (_upperCaseA <= aChar && aChar <= _upperCaseZ) {
197 0 : aLowerCase += _asciiCaseBit;
198 : }
199 0 : if (_upperCaseA <= bChar && bChar <= _upperCaseZ) {
200 0 : bLowerCase += _asciiCaseBit;
201 : }
202 0 : if (aLowerCase != bLowerCase) {
203 0 : return _compareNaturally(a, b, i, aLowerCase, bLowerCase);
204 : }
205 0 : if (defaultResult == 0) defaultResult = aChar - bChar;
206 : }
207 0 : if (b.length > a.length) return -1;
208 0 : return defaultResult.sign;
209 : }
210 :
211 : /// Compares strings [a] and [b] according to upper-case
212 : /// [natural sort ordering][].
213 : ///
214 : /// ASCII letters are converted to upper case before being compared, like
215 : /// for [compareAsciiUpperCase], then the result is compared like for
216 : /// [compareNatural].
217 : ///
218 : /// If two strings differ only on the case of ASCII letters, the one with the
219 : /// capital letter at the first difference will compare as less than the other
220 : /// string. This tie-breaking ensures that the comparison is a total ordering
221 : /// on strings
222 : ///
223 : /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order
224 0 : int compareAsciiUpperCaseNatural(String a, String b) {
225 : var defaultResult = 0;
226 0 : for (var i = 0; i < a.length; i++) {
227 0 : if (i >= b.length) return 1;
228 0 : var aChar = a.codeUnitAt(i);
229 0 : var bChar = b.codeUnitAt(i);
230 0 : if (aChar == bChar) continue;
231 : var aUpperCase = aChar;
232 : var bUpperCase = bChar;
233 0 : if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) {
234 0 : aUpperCase -= _asciiCaseBit;
235 : }
236 0 : if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) {
237 0 : bUpperCase -= _asciiCaseBit;
238 : }
239 0 : if (aUpperCase != bUpperCase) {
240 0 : return _compareNaturally(a, b, i, aUpperCase, bUpperCase);
241 : }
242 0 : if (defaultResult == 0) defaultResult = aChar - bChar;
243 : }
244 0 : if (b.length > a.length) return -1;
245 0 : return defaultResult.sign;
246 : }
247 :
248 : /// Check for numbers overlapping the current mismatched characters.
249 : ///
250 : /// If both [aChar] and [bChar] are digits, use numerical comparison.
251 : /// Check if the previous characters is a non-zero number, and if not,
252 : /// skip - but count - leading zeros before comparing numbers.
253 : ///
254 : /// If one is a digit and the other isn't, check if the previous character
255 : /// is a digit, and if so, the the one with the digit is the greater number.
256 : ///
257 : /// Otherwise just returns the difference between [aChar] and [bChar].
258 0 : int _compareNaturally(String a, String b, int index, int aChar, int bChar) {
259 0 : assert(aChar != bChar);
260 0 : var aIsDigit = _isDigit(aChar);
261 0 : var bIsDigit = _isDigit(bChar);
262 : if (aIsDigit) {
263 : if (bIsDigit) {
264 0 : return _compareNumerically(a, b, aChar, bChar, index);
265 0 : } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) {
266 : // aChar is the continuation of a longer number.
267 : return 1;
268 : }
269 0 : } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) {
270 : // bChar is the continuation of a longer number.
271 0 : return -1;
272 : }
273 : // Characters are both non-digits, or not continuation of earlier number.
274 0 : return (aChar - bChar).sign;
275 : }
276 :
277 : /// Compare numbers overlapping [aChar] and [bChar] numerically.
278 : ///
279 : /// If the numbers have the same numerical value, but one has more leading
280 : /// zeros, the longer number is considered greater than the shorter one.
281 : ///
282 : /// This ensures a total ordering on strings compatible with equality.
283 0 : int _compareNumerically(String a, String b, int aChar, int bChar, int index) {
284 : // Both are digits. Find the first significant different digit, then find
285 : // the length of the numbers.
286 0 : if (_isNonZeroNumberSuffix(a, index)) {
287 : // Part of a longer number, differs at this index, just count the length.
288 0 : var result = _compareDigitCount(a, b, index, index);
289 0 : if (result != 0) return result;
290 : // If same length, the current character is the most significant differing
291 : // digit.
292 0 : return (aChar - bChar).sign;
293 : }
294 : // Not part of larger (non-zero) number, so skip leading zeros before
295 : // comparing numbers.
296 : var aIndex = index;
297 : var bIndex = index;
298 0 : if (aChar == _zero) {
299 : do {
300 0 : aIndex++;
301 0 : if (aIndex == a.length) return -1; // number in a is zero, b is not.
302 0 : aChar = a.codeUnitAt(aIndex);
303 0 : } while (aChar == _zero);
304 0 : if (!_isDigit(aChar)) return -1;
305 0 : } else if (bChar == _zero) {
306 : do {
307 0 : bIndex++;
308 0 : if (bIndex == b.length) return 1; // number in b is zero, a is not.
309 0 : bChar = b.codeUnitAt(bIndex);
310 0 : } while (bChar == _zero);
311 0 : if (!_isDigit(bChar)) return 1;
312 : }
313 0 : if (aChar != bChar) {
314 0 : var result = _compareDigitCount(a, b, aIndex, bIndex);
315 0 : if (result != 0) return result;
316 0 : return (aChar - bChar).sign;
317 : }
318 : // Same leading digit, one had more leading zeros.
319 : // Compare digits until reaching a difference.
320 : while (true) {
321 : var aIsDigit = false;
322 : var bIsDigit = false;
323 : aChar = 0;
324 : bChar = 0;
325 0 : if (++aIndex < a.length) {
326 0 : aChar = a.codeUnitAt(aIndex);
327 0 : aIsDigit = _isDigit(aChar);
328 : }
329 0 : if (++bIndex < b.length) {
330 0 : bChar = b.codeUnitAt(bIndex);
331 0 : bIsDigit = _isDigit(bChar);
332 : }
333 : if (aIsDigit) {
334 : if (bIsDigit) {
335 0 : if (aChar == bChar) continue;
336 : // First different digit found.
337 : break;
338 : }
339 : // bChar is non-digit, so a has longer number.
340 : return 1;
341 : } else if (bIsDigit) {
342 0 : return -1; // b has longer number.
343 : } else {
344 : // Neither is digit, so numbers had same numerical value.
345 : // Fall back on number of leading zeros
346 : // (reflected by difference in indices).
347 0 : return (aIndex - bIndex).sign;
348 : }
349 : }
350 : // At first differing digits.
351 0 : var result = _compareDigitCount(a, b, aIndex, bIndex);
352 0 : if (result != 0) return result;
353 0 : return (aChar - bChar).sign;
354 : }
355 :
356 : /// Checks which of [a] and [b] has the longest sequence of digits.
357 : ///
358 : /// Starts counting from `i + 1` and `j + 1` (assumes that `a[i]` and `b[j]` are
359 : /// both already known to be digits).
360 0 : int _compareDigitCount(String a, String b, int i, int j) {
361 0 : while (++i < a.length) {
362 0 : var aIsDigit = _isDigit(a.codeUnitAt(i));
363 0 : if (++j == b.length) return aIsDigit ? 1 : 0;
364 0 : var bIsDigit = _isDigit(b.codeUnitAt(j));
365 : if (aIsDigit) {
366 : if (bIsDigit) continue;
367 : return 1;
368 : } else if (bIsDigit) {
369 0 : return -1;
370 : } else {
371 : return 0;
372 : }
373 : }
374 0 : if (++j < b.length && _isDigit(b.codeUnitAt(j))) {
375 0 : return -1;
376 : }
377 : return 0;
378 : }
379 :
380 0 : bool _isDigit(int charCode) => (charCode ^ _zero) <= 9;
381 :
382 : /// Check if the digit at [index] is continuing a non-zero number.
383 : ///
384 : /// If there is no non-zero digits before, then leading zeros at [index]
385 : /// are also ignored when comparing numerically. If there is a non-zero digit
386 : /// before, then zeros at [index] are significant.
387 0 : bool _isNonZeroNumberSuffix(String string, int index) {
388 0 : while (--index >= 0) {
389 0 : var char = string.codeUnitAt(index);
390 0 : if (char != _zero) return _isDigit(char);
391 : }
392 : return false;
393 : }
|