gbk2unicode function
Converts a list of GBK encoded bytes to their corresponding Unicode characters.
Implementation
List<int> gbk2unicode(List<int> gbkBytes) {
int uniInd = 0, gbkInd = 0;
List<int> uniPtr = List.filled(gbkBytes.length, 0);
while (gbkInd < gbkBytes.length) {
int ch = gbkBytes[gbkInd];
// ASCII characters (0x00-0x7F) are single bytes
if (ch < 0x80) {
uniPtr[uniInd] = ch;
uniInd++;
gbkInd++;
} else {
// GBK multi-byte character - need two bytes
if (gbkInd + 1 >= gbkBytes.length) {
// Incomplete multi-byte sequence at end of input
break;
}
int word = (ch << 8) | gbkBytes[gbkInd + 1];
int wordPos = word - gbkFirstCode;
if (word >= gbkFirstCode &&
word <= gbkLastCode &&
wordPos < unicodeBufferSize) {
uniPtr[uniInd] = unicodeTables[wordPos];
uniInd++;
}
gbkInd += 2;
}
}
// Return properly resized list
return uniPtr.take(uniInd).toList();
}