Line data Source code
1 : // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2 : // for details. All rights reserved. Use of this source code is governed by a
3 : // BSD-style license that can be found in the LICENSE file.
4 :
5 : import 'dart:convert';
6 : import 'dart:math' as math;
7 : import 'dart:typed_data';
8 :
9 : import 'package:charcode/ascii.dart';
10 : import 'package:typed_data/typed_data.dart';
11 :
12 : /// The canonical instance of [ChunkedCodingDecoder].
13 : const chunkedCodingDecoder = const ChunkedCodingDecoder._();
14 :
15 : /// A converter that decodes byte arrays into chunks with size tags.
16 : class ChunkedCodingDecoder extends Converter<List<int>, List<int>> {
17 5 : const ChunkedCodingDecoder._();
18 :
19 : List<int> convert(List<int> bytes) {
20 0 : var sink = new _Sink(null);
21 0 : var output = sink._decode(bytes, 0, bytes.length);
22 0 : if (sink._state == _State.end) return output;
23 :
24 0 : throw new FormatException(
25 0 : "Input ended unexpectedly.", bytes, bytes.length);
26 : }
27 :
28 : ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
29 0 : new _Sink(sink);
30 : }
31 :
32 : /// A conversion sink for the chunked transfer encoding.
33 : class _Sink extends ByteConversionSinkBase {
34 : /// The underlying sink to which decoded byte arrays will be passed.
35 : final Sink<List<int>> _sink;
36 :
37 : /// The current state of the sink's parsing.
38 : var _state = _State.boundary;
39 :
40 : /// The size of the chunk being parsed, or `null` if the size hasn't been
41 : /// parsed yet.
42 : int _size;
43 :
44 0 : _Sink(this._sink);
45 :
46 0 : void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
47 :
48 : void addSlice(List<int> chunk, int start, int end, bool isLast) {
49 0 : RangeError.checkValidRange(start, end, chunk.length);
50 0 : var output = _decode(chunk, start, end);
51 0 : if (output.isNotEmpty) _sink.add(output);
52 0 : if (isLast) _close(chunk, end);
53 : }
54 :
55 0 : void close() => _close();
56 :
57 : /// Like [close], but includes [chunk] and [index] in the [FormatException] if
58 : /// one is thrown.
59 : void _close([List<int> chunk, int index]) {
60 0 : if (_state != _State.end) {
61 0 : throw new FormatException("Input ended unexpectedly.", chunk, index);
62 : }
63 :
64 0 : _sink.close();
65 : }
66 :
67 : /// Decodes the data in [bytes] from [start] to [end].
68 : Uint8List _decode(List<int> bytes, int start, int end) {
69 : /// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to
70 : /// describe the character in the exception text.
71 : assertCurrentChar(int char, String name) {
72 0 : if (bytes[start] != char) {
73 0 : throw new FormatException("Expected $name.", bytes, start);
74 : }
75 : }
76 :
77 0 : var buffer = new Uint8Buffer();
78 0 : while (start != end) {
79 0 : switch (_state) {
80 0 : case _State.boundary:
81 0 : _size = _digitForByte(bytes, start);
82 0 : _state = _State.size;
83 0 : start++;
84 : break;
85 :
86 0 : case _State.size:
87 0 : if (bytes[start] == $cr) {
88 0 : _state = _State.sizeBeforeLF;
89 : } else {
90 : // Shift four bits left since a single hex digit contains four bits
91 : // of information.
92 0 : _size = (_size << 4) + _digitForByte(bytes, start);
93 : }
94 0 : start++;
95 : break;
96 :
97 0 : case _State.sizeBeforeLF:
98 0 : assertCurrentChar($lf, "LF");
99 0 : _state = _size == 0 ? _State.endBeforeCR : _State.body;
100 0 : start++;
101 : break;
102 :
103 0 : case _State.body:
104 5 : var chunkEnd = math.min(end, start + _size);
105 0 : buffer.addAll(bytes, start, chunkEnd);
106 0 : _size -= chunkEnd - start;
107 : start = chunkEnd;
108 0 : if (_size == 0) _state = _State.bodyBeforeCR;
109 : break;
110 :
111 0 : case _State.bodyBeforeCR:
112 0 : assertCurrentChar($cr, "CR");
113 0 : _state = _State.bodyBeforeLF;
114 0 : start++;
115 : break;
116 :
117 0 : case _State.bodyBeforeLF:
118 0 : assertCurrentChar($lf, "LF");
119 0 : _state = _State.boundary;
120 0 : start++;
121 : break;
122 :
123 0 : case _State.endBeforeCR:
124 0 : assertCurrentChar($cr, "CR");
125 0 : _state = _State.endBeforeLF;
126 0 : start++;
127 : break;
128 :
129 0 : case _State.endBeforeLF:
130 0 : assertCurrentChar($lf, "LF");
131 0 : _state = _State.end;
132 0 : start++;
133 : break;
134 :
135 0 : case _State.end:
136 0 : throw new FormatException("Expected no more data.", bytes, start);
137 : }
138 : }
139 0 : return buffer.buffer.asUint8List(0, buffer.length);
140 : }
141 :
142 : /// Returns the hex digit (0 through 15) corresponding to the byte at index
143 : /// [i] in [bytes].
144 : ///
145 : /// If the given byte isn't a hexadecimal ASCII character, throws a
146 : /// [FormatException].
147 : int _digitForByte(List<int> bytes, int index) {
148 : // If the byte is a numeral, get its value. XOR works because 0 in ASCII is
149 : // `0b110000` and the other numerals come after it in ascending order and
150 : // take up at most four bits.
151 : //
152 : // We check for digits first because it ensures there's only a single branch
153 : // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
154 : // check because branch prediction will always work on it for valid data.
155 0 : var byte = bytes[index];
156 0 : var digit = $0 ^ byte;
157 0 : if (digit <= 9) {
158 0 : if (digit >= 0) return digit;
159 : } else {
160 : // If the byte is an uppercase letter, convert it to lowercase. This works
161 : // because uppercase letters in ASCII are exactly `0b100000 = 0x20` less
162 : // than lowercase letters, so if we ensure that that bit is 1 we ensure that
163 : // the letter is lowercase.
164 0 : var letter = 0x20 | byte;
165 0 : if ($a <= letter && letter <= $f) return letter - $a + 10;
166 : }
167 :
168 0 : throw new FormatException(
169 0 : "Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.",
170 : bytes, index);
171 : }
172 : }
173 :
174 : /// An enumeration of states that [_Sink] can exist in when decoded a chunked
175 : /// message.
176 : class _State {
177 : /// The parser has fully parsed one chunk and is expecting the header for the
178 : /// next chunk.
179 : ///
180 : /// Transitions to [size].
181 : static const boundary = const _State._("boundary");
182 :
183 : /// The parser has parsed at least one digit of the chunk size header, but has
184 : /// not yet parsed the `CR LF` sequence that indicates the end of that header.
185 : ///
186 : /// Transitions to [sizeBeforeLF].
187 : static const size = const _State._("size");
188 :
189 : /// The parser has parsed the chunk size header and the CR character after it,
190 : /// but not the LF.
191 : ///
192 : /// Transitions to [body] or [bodyBeforeCR].
193 : static const sizeBeforeLF = const _State._("size before LF");
194 :
195 : /// The parser has parsed a chunk header and possibly some of the body, but
196 : /// still needs to consume more bytes.
197 : ///
198 : /// Transitions to [bodyBeforeCR].
199 : static const body = const _State._("body");
200 :
201 : // The parser has parsed all the bytes in a chunk body but not the CR LF
202 : // sequence that follows it.
203 : //
204 : // Transitions to [bodyBeforeLF].
205 : static const bodyBeforeCR = const _State._("body before CR");
206 :
207 : // The parser has parsed all the bytes in a chunk body and the CR that follows
208 : // it, but not the LF after that.
209 : //
210 : // Transitions to [bounday].
211 : static const bodyBeforeLF = const _State._("body before LF");
212 :
213 : /// The parser has parsed the final empty chunk but not the CR LF sequence
214 : /// that follows it.
215 : ///
216 : /// Transitions to [endBeforeLF].
217 : static const endBeforeCR = const _State._("end before CR");
218 :
219 : /// The parser has parsed the final empty chunk and the CR that follows it,
220 : /// but not the LF after that.
221 : ///
222 : /// Transitions to [end].
223 : static const endBeforeLF = const _State._("end before LF");
224 :
225 : /// The parser has parsed the final empty chunk as well as the CR LF that
226 : /// follows, and expects no more data.
227 : static const end = const _State._("end");
228 :
229 : final String _name;
230 :
231 5 : const _State._(this._name);
232 :
233 0 : String toString() => _name;
234 : }
|