encodeSpecificVersion method

ResultList encodeSpecificVersion(
Version version
)
Implementation

ResultList encodeSpecificVersion(Version version) {
  /// A vertex represents a tuple of a position in the input, a mode and a character encoding where position 0
  /// denotes the position left of the first character, 1 the position left of the second character and so on.
  /// Likewise the end vertices are located after the last character at position stringToEncode.length().
  ///
  /// An edge leading to such a vertex encodes one or more of the characters left of the position that the vertex
  /// represents and encodes it in the same encoding and mode as the vertex on which the edge ends. In other words,
  /// all edges leading to a particular vertex encode the same characters in the same mode with the same character
  /// encoding. They differ only by their source vertices who are all located at i+1 minus the number of encoded
  /// characters.
  ///
  /// The edges leading to a vertex are stored in such a way that there is a fast way to enumerate the edges ending
  /// on a particular vertex.
  ///
  /// The algorithm processes the vertices in order of their position thereby performing the following:
  ///
  /// For every vertex at position i the algorithm enumerates the edges ending on the vertex and removes all but the
  /// shortest from that list.
  /// Then it processes the vertices for the position i+1. If i+1 == stringToEncode.length() then the algorithm ends
  /// and chooses the the edge with the smallest size from any of the edges leading to vertices at this position.
  /// Otherwise the algorithm computes all possible outgoing edges for the vertices at the position i+1
  ///
  /// Examples:
  /// The process is illustrated by showing the graph (edges) after each iteration from left to right over the input:
  /// An edge is drawn as follows "(" + fromVertex + ") -- " + encodingMode + "(" + encodedInput + ") (" +
  /// accumulatedSize + ") --> (" + toVertex + ")"
  ///
  /// Example 1 encoding the string "ABCDE":
  /// Note: This example assumes that alphanumeric encoding is only possible in multiples of two characters so that
  /// the example is both short and showing the principle. In reality this restriction does not exist.
  ///
  /// Initial situation
  /// (initial) -- BYTE(A) (20) --> (1_BYTE)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC)
  ///
  /// Situation after adding edges to vertices at position 1
  /// (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE)
  ///                               (1_BYTE) -- ALPHANUMERIC(BC)                             (44) --> (3_ALPHANUMERIC)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC)
  ///
  /// Situation after adding edges to vertices at position 2
  /// (initial) -- BYTE(A) (20) --> (1_BYTE)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC)
  /// (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE)
  /// (1_BYTE) -- ALPHANUMERIC(BC)                             (44) --> (3_ALPHANUMERIC)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC) -- BYTE(C) (44) --> (3_BYTE)
  ///                                                            (2_ALPHANUMERIC) -- ALPHANUMERIC(CD)                             (35) --> (4_ALPHANUMERIC)
  ///
  /// Situation after adding edges to vertices at position 3
  /// (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C)         (36) --> (3_BYTE)
  ///                               (1_BYTE) -- ALPHANUMERIC(BC)                             (44) --> (3_ALPHANUMERIC) -- BYTE(D) (64) --> (4_BYTE)
  ///                                                                                                 (3_ALPHANUMERIC) -- ALPHANUMERIC(DE)                             (55) --> (5_ALPHANUMERIC)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD)                             (35) --> (4_ALPHANUMERIC)
  ///                                                            (2_ALPHANUMERIC) -- ALPHANUMERIC(CD)                             (35) --> (4_ALPHANUMERIC)
  ///
  /// Situation after adding edges to vertices at position 4
  /// (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C)         (36) --> (3_BYTE) -- BYTE(D) (44) --> (4_BYTE)
  ///                               (1_BYTE) -- ALPHANUMERIC(BC)                             (44) --> (3_ALPHANUMERIC) -- ALPHANUMERIC(DE)                             (55) --> (5_ALPHANUMERIC)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD)                             (35) --> (4_ALPHANUMERIC) -- BYTE(E) (55) --> (5_BYTE)
  ///
  /// Situation after adding edges to vertices at position 5
  /// (initial) -- BYTE(A) (20) --> (1_BYTE) -- BYTE(B) (28) --> (2_BYTE) -- BYTE(C)         (36) --> (3_BYTE) -- BYTE(D)         (44) --> (4_BYTE) -- BYTE(E)         (52) --> (5_BYTE)
  ///                               (1_BYTE) -- ALPHANUMERIC(BC)                             (44) --> (3_ALPHANUMERIC) -- ALPHANUMERIC(DE)                             (55) --> (5_ALPHANUMERIC)
  /// (initial) -- ALPHANUMERIC(AB)                     (24) --> (2_ALPHANUMERIC) -- ALPHANUMERIC(CD)                             (35) --> (4_ALPHANUMERIC)
  ///
  /// Encoding as BYTE(ABCDE) has the smallest size of 52 and is hence chosen. The encodation ALPHANUMERIC(ABCD),
  /// BYTE(E) is longer with a size of 55.
  ///
  /// Example 2 encoding the string "XXYY" where X denotes a character unique to character set ISO-8859-2 and Y a
  /// character unique to ISO-8859-3. Both characters encode as double byte in UTF-8:
  ///
  /// Initial situation
  /// (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE)
  ///
  /// Situation after adding edges to vertices at position 1
  /// (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2)
  ///                               (1_BYTE_ISO-8859-2) -- BYTE(X) (72) --> (2_BYTE_UTF-8)
  ///                               (1_BYTE_ISO-8859-2) -- BYTE(X) (72) --> (2_BYTE_UTF-16BE)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE)
  ///
  /// Situation after adding edges to vertices at position 2
  /// (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2)
  ///                                                                       (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYTE_ISO-8859-3)
  ///                                                                       (2_BYTE_ISO-8859-2) -- BYTE(Y) (80) --> (3_BYTE_UTF-8)
  ///                                                                       (2_BYTE_ISO-8859-2) -- BYTE(Y) (80) --> (3_BYTE_UTF-16BE)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE)
  ///
  /// Situation after adding edges to vertices at position 3
  /// (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYTE_ISO-8859-3)
  ///                                                                                                               (3_BYTE_ISO-8859-3) -- BYTE(Y) (80) --> (4_BYTE_ISO-8859-3)
  ///                                                                                                               (3_BYTE_ISO-8859-3) -- BYTE(Y) (112) --> (4_BYTE_UTF-8)
  ///                                                                                                               (3_BYTE_ISO-8859-3) -- BYTE(Y) (112) --> (4_BYTE_UTF-16BE)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) -- BYTE(Y) (72) --> (3_BYTE_UTF-8)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) -- BYTE(Y) (72) --> (3_BYTE_UTF-16BE)
  ///
  /// Situation after adding edges to vertices at position 4
  /// (initial) -- BYTE(X) (32) --> (1_BYTE_ISO-8859-2) -- BYTE(X) (40) --> (2_BYTE_ISO-8859-2) -- BYTE(Y) (72) --> (3_BYTE_ISO-8859-3) -- BYTE(Y) (80) --> (4_BYTE_ISO-8859-3)
  ///                                                                                                               (3_BYTE_UTF-8) -- BYTE(Y) (88) --> (4_BYTE_UTF-8)
  ///                                                                                                               (3_BYTE_UTF-16BE) -- BYTE(Y) (88) --> (4_BYTE_UTF-16BE)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-8) -- BYTE(X) (56) --> (2_BYTE_UTF-8) -- BYTE(Y) (72) --> (3_BYTE_UTF-8)
  /// (initial) -- BYTE(X) (40) --> (1_BYTE_UTF-16BE) -- BYTE(X) (56) --> (2_BYTE_UTF-16BE) -- BYTE(Y) (72) --> (3_BYTE_UTF-16BE)
  ///
  /// Encoding as ECI(ISO-8859-2),BYTE(XX),ECI(ISO-8859-3),BYTE(YY) has the smallest size of 80 and is hence chosen.
  /// The encodation ECI(UTF-8),BYTE(XXYY) is longer with a size of 88.

  final inputLength = stringToEncode.length;

  // Array that represents vertices. There is a vertex for every character, encoding and mode. The vertex contains
  // a list of all edges that lead to it that have the same encoding and mode.
  // The lists are created lazily

  // The last dimension in the array below encodes the 4 modes KANJI, ALPHANUMERIC, NUMERIC and BYTE via the
  // function getCompactedOrdinal(Mode)
  final List<List<List<Edge?>>> edges = List.generate(
    inputLength + 1,
    (idx) => List.generate(encoders.length, (idx2) => List.filled(4, null)),
  );
  addEdges(version, edges, 0, null);

  for (int i = 1; i <= inputLength; i++) {
    for (int j = 0; j < encoders.length; j++) {
      for (int k = 0; k < 4; k++) {
        if (edges[i][j][k] != null && i < inputLength) {
          addEdges(version, edges, i, edges[i][j][k]);
        }
      }
    }
  }
  int minimalJ = -1;
  int minimalK = -1;
  int minimalSize = MathUtils.maxValue;
  for (int j = 0; j < encoders.length; j++) {
    for (int k = 0; k < 4; k++) {
      if (edges[inputLength][j][k] != null) {
        final edge = edges[inputLength][j][k];
        if (edge != null && edge.cachedTotalSize < minimalSize) {
          minimalSize = edge.cachedTotalSize;
          minimalJ = j;
          minimalK = k;
        }
      }
    }
  }
  if (minimalJ < 0) {
    throw WriterException(
      'Internal error: failed to encode "$stringToEncode"',
    );
  }
  return ResultList(version, edges[inputLength][minimalJ][minimalK], this);
}
encodeSpecificVersion method

Implementation

MinimalEncoder class