utf82unicode function

List<int> utf82unicode(
  1. List<int> utf8Bytes
)

Converts a list of UTF-8 encoded bytes to their corresponding Unicode characters.

Implementation

List<int> utf82unicode(List<int> utf8Bytes) {
  List<int> loc = [];

  for (int i = 0; i < utf8Bytes.length;) {
    int firstByte = utf8Bytes[i];
    int byteCount = zPos(firstByte);
    int unicode;

    if (byteCount == 0) {
      // Single byte (ASCII)
      unicode = firstByte;
      i++;
    } else {
      // Multi-byte sequence
      if (i + byteCount >= utf8Bytes.length) {
        // Incomplete sequence at end of input
        break;
      }

      // Extract the significant bits from the first byte
      unicode = firstByte & mask[byteCount];

      // Process continuation bytes
      for (int j = 1; j <= byteCount; j++) {
        if (i + j >= utf8Bytes.length) {
          // Incomplete sequence
          return loc;
        }
        unicode = (unicode << 6) | (utf8Bytes[i + j] & 0x3f);
      }

      i += byteCount + 1;
    }

    loc.add(unicode);
  }

  return loc;
}