normalize static method
Implementation
static String normalize(String ch) {
initCjkMap();
final block = unicodeBlock(ch);
if (block == UnicodeBlock.unicodeBasicLatin) {
if (ch.compareTo('A') < 0 ||
(ch.compareTo('Z') > 0 && ch.compareTo('a') < 0) ||
ch.compareTo('z') > 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.unicodeLatin1Supplement) {
if (latin1Excluded.contains(ch)) {
ch = ' ';
}
} else if (block == UnicodeBlock.unicodeLatinExtendedB) {
// normalization for Romanian
if (ch == '\u0219') {
// Small S with comma below => with cedilla
ch = '\u015f';
}
if (ch == '\u021b') {
// Small T with comma below => with cedilla
ch = '\u0163';
}
} else if (block == UnicodeBlock.unicodeGeneralPunctuation) {
ch = ' ';
} else if (block == UnicodeBlock.unicodeArabic) {
if (ch == '\u06cc') {
ch = '\u064a'; // Farsi yeh => Arabic yeh
}
} else if (block == UnicodeBlock.unicodeLatinExtendedAdditional) {
if (ch.compareTo('\u1ea0') >= 0) {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.unicodeHiragana) {
ch = '\u3042';
} else if (block == UnicodeBlock.unicodeKatakana) {
ch = '\u30a2';
} else if (block == UnicodeBlock.unicodeBopomofo ||
block == UnicodeBlock.unicodeBopomofoExtended) {
ch = '\u3105';
} else if (block == UnicodeBlock.unicodeCjkUnifiedIdeographs) {
ch = cjkMap[ch] ?? ch;
} else if (block == UnicodeBlock.unicodeHangulSyllables) {
ch = '\uac00';
}
return ch;
}