decode static method

String decode(
  1. List<int> bytes, {
  2. bool allowMalformed = false,
})

Implementation

static String decode(List<int> bytes, {bool allowMalformed = false}) {
  final c = <int>[];
  int i = 0;

  // Skip BOM if present at the start
  if (bytes.length >= 3 &&
      bytes[0] == 0xEF &&
      bytes[1] == 0xBB &&
      bytes[2] == 0xBF) {
    i = 3;
  }

  while (i < bytes.length) {
    int byte1 = bytes[i];

    // ASCII
    if (byte1 <= 0x7F) {
      c.add(byte1);
      i++;
      continue;
    }

    int expectedLength;
    int codePoint;

    // Determine sequence length and initial bits
    if (byte1 >= 0xC2 && byte1 <= 0xDF) {
      expectedLength = 2;
      codePoint = byte1 & 0x1F;
    } else if (byte1 >= 0xE0 && byte1 <= 0xEF) {
      expectedLength = 3;
      codePoint = byte1 & 0x0F;
    } else if (byte1 >= 0xF0 && byte1 <= 0xF4) {
      expectedLength = 4;
      codePoint = byte1 & 0x07;
    } else {
      if (allowMalformed) {
        c.add(0xFFFD);
        i++;
      } else {
        throw ArgumentException.invalidOperationArguments(
          "Invalid UTF-8 bytes.",
          name: "bytes",
          reason: "Invalid UTF-8 lead byte at position $i: $byte1",
        );
      }
      continue;
    }

    int remaining = bytes.length - i - 1;
    if (remaining < expectedLength - 1) {
      if (allowMalformed) {
        c.add(0xFFFD);
        i += remaining + 1;
      } else {
        throw ArgumentException.invalidOperationArguments(
          "Invalid UTF-8 bytes.",
          name: "bytes",
          reason: "Truncated UTF-8 sequence at position $i",
        );
      }
      continue;
    }

    // Validate continuation bytes
    bool valid = true;
    for (int j = 1; j < expectedLength; j++) {
      if ((bytes[i + j] & 0xC0) != 0x80) {
        valid = false;
        break;
      }
    }

    if (!valid) {
      if (allowMalformed) {
        int consume = 1;
        while (i + consume < bytes.length &&
            (bytes[i + consume] & 0xC0) == 0x80) {
          consume++;
        }
        c.add(0xFFFD);
        i += consume;
      } else {
        throw ArgumentException.invalidOperationArguments(
          "Invalid UTF-8 bytes.",
          name: "bytes",
          reason: "Invalid UTF-8 continuation bytes at position $i",
        );
      }
      continue;
    }

    for (int j = 1; j < expectedLength; j++) {
      codePoint = (codePoint << 6) | (bytes[i + j] & 0x3F);
    }

    if (codePoint > 0x10FFFF ||
        (expectedLength == 2 && codePoint <= 0x7F) ||
        (expectedLength == 3 && codePoint <= 0x7FF) ||
        (expectedLength == 4 && codePoint <= 0xFFFF) ||
        (codePoint >= 0xD800 && codePoint <= 0xDFFF)) {
      if (allowMalformed) {
        c.add(0xFFFD);
        i++;
      } else {
        throw ArgumentException.invalidOperationArguments(
          "Invalid UTF-8 bytes.",
          name: "bytes",
          reason: "Invalid UTF-8 code point at position $i: $codePoint",
        );
      }
      continue;
    }

    if (codePoint <= 0xFFFF) {
      c.add(codePoint);
    } else {
      codePoint -= 0x10000;
      c.add(0xD800 + (codePoint >> 10));
      c.add(0xDC00 + (codePoint & 0x3FF));
    }

    i += expectedLength;
  }

  return String.fromCharCodes(c);
}