utf82unicode function
Converts a list of UTF-8 encoded bytes to their corresponding Unicode characters.
Implementation
List<int> utf82unicode(List<int> utf8Bytes) {
List<int> loc = [];
for (int i = 0; i < utf8Bytes.length;) {
int firstByte = utf8Bytes[i];
int byteCount = zPos(firstByte);
int unicode;
if (byteCount == 0) {
// Single byte (ASCII)
unicode = firstByte;
i++;
} else {
// Multi-byte sequence
if (i + byteCount >= utf8Bytes.length) {
// Incomplete sequence at end of input
break;
}
// Extract the significant bits from the first byte
unicode = firstByte & mask[byteCount];
// Process continuation bytes
for (int j = 1; j <= byteCount; j++) {
if (i + j >= utf8Bytes.length) {
// Incomplete sequence
return loc;
}
unicode = (unicode << 6) | (utf8Bytes[i + j] & 0x3f);
}
i += byteCount + 1;
}
loc.add(unicode);
}
return loc;
}