gbk2unicode function

List<int> gbk2unicode(
  1. List<int> gbkBytes
)

Converts a list of GBK encoded bytes to their corresponding Unicode characters.

Implementation

List<int> gbk2unicode(List<int> gbkBytes) {
  int uniInd = 0, gbkInd = 0;
  List<int> uniPtr = List.filled(gbkBytes.length, 0);

  while (gbkInd < gbkBytes.length) {
    int ch = gbkBytes[gbkInd];

    // ASCII characters (0x00-0x7F) are single bytes
    if (ch < 0x80) {
      uniPtr[uniInd] = ch;
      uniInd++;
      gbkInd++;
    } else {
      // GBK multi-byte character - need two bytes
      if (gbkInd + 1 >= gbkBytes.length) {
        // Incomplete multi-byte sequence at end of input
        break;
      }

      int word = (ch << 8) | gbkBytes[gbkInd + 1];
      int wordPos = word - gbkFirstCode;

      if (word >= gbkFirstCode &&
          word <= gbkLastCode &&
          wordPos < unicodeBufferSize) {
        uniPtr[uniInd] = unicodeTables[wordPos];
        uniInd++;
      }
      gbkInd += 2;
    }
  }

  // Return properly resized list
  return uniPtr.take(uniInd).toList();
}