unicode2utf8 function

List<int> unicode2utf8(
  1. List<int> unicodeBytes
)

Converts a list of Unicode characters to their corresponding UTF-8 encoded bytes.

Implementation

List<int> unicode2utf8(List<int> unicodeBytes) {
  List<int> utf8Bytes = [];

  for (int unicode in unicodeBytes) {
    if (unicode < 0x80) {
      // Single byte for ASCII characters
      utf8Bytes.add(unicode);
    } else if (unicode < 0x800) {
      // Two bytes for characters in the range 0x80-0x7FF
      utf8Bytes.add(0xc0 | (unicode >> 6));
      utf8Bytes.add(0x80 | (unicode & 0x3f));
    } else if (unicode < 0x10000) {
      // Three bytes for characters in the range 0x800-0xFFFF
      utf8Bytes.add(0xe0 | (unicode >> 12));
      utf8Bytes.add(0x80 | ((unicode >> 6) & 0x3f));
      utf8Bytes.add(0x80 | (unicode & 0x3f));
    } else {
      // Four bytes for characters in the range 0x10000-0x10FFFF
      utf8Bytes.add(0xf0 | (unicode >> 18));
      utf8Bytes.add(0x80 | ((unicode >> 12) & 0x3f));
      utf8Bytes.add(0x80 | ((unicode >> 6) & 0x3f));
      utf8Bytes.add(0x80 | (unicode & 0x3f));
    }
  }

  return utf8Bytes;
}