Line data Source code
1 : // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2 : // for details. All rights reserved. Use of this source code is governed by a
3 : // BSD-style license that can be found in the LICENSE file.
4 :
5 : import 'dart:async';
6 : import 'dart:convert';
7 : import 'dart:math' as math;
8 : import 'dart:typed_data';
9 :
10 : import 'package:charcode/ascii.dart';
11 : import 'package:typed_data/typed_data.dart';
12 :
13 : /// The canonical instance of [ChunkedCodingDecoder].
14 : const chunkedCodingDecoder = ChunkedCodingDecoder._();
15 :
16 : /// A converter that decodes byte arrays into chunks with size tags.
17 : class ChunkedCodingDecoder extends Converter<List<int>, List<int>> {
18 11 : const ChunkedCodingDecoder._();
19 :
20 0 : @override
21 : List<int> convert(List<int> input) {
22 0 : final sink = _Sink(StreamController());
23 0 : final output = sink._decode(input, 0, input.length);
24 0 : if (sink._state == _State.end) return output;
25 :
26 0 : throw FormatException('Input ended unexpectedly.', input, input.length);
27 : }
28 :
29 0 : @override
30 : ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
31 0 : _Sink(sink);
32 : }
33 :
34 : /// A conversion sink for the chunked transfer encoding.
35 : class _Sink extends ByteConversionSinkBase {
36 : /// The underlying sink to which decoded byte arrays will be passed.
37 : final Sink<List<int>> _sink;
38 :
39 : /// The current state of the sink's parsing.
40 : var _state = _State.boundary;
41 :
42 : /// The size of the chunk being parsed.
43 : ///
44 : /// Only assigned and used within [_decode].
45 : late int _size;
46 :
47 0 : _Sink(this._sink);
48 :
49 0 : @override
50 0 : void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
51 :
52 0 : @override
53 : void addSlice(List<int> chunk, int start, int end, bool isLast) {
54 0 : RangeError.checkValidRange(start, end, chunk.length);
55 0 : final output = _decode(chunk, start, end);
56 0 : if (output.isNotEmpty) _sink.add(output);
57 0 : if (isLast) _close(chunk, end);
58 : }
59 :
60 0 : @override
61 0 : void close() => _close();
62 :
63 : /// Like [close], but includes [chunk] and [index] in the [FormatException] if
64 : /// one is thrown.
65 0 : void _close([List<int>? chunk, int? index]) {
66 0 : if (_state != _State.end) {
67 0 : throw FormatException('Input ended unexpectedly.', chunk, index);
68 : }
69 :
70 0 : _sink.close();
71 : }
72 :
73 : /// Decodes the data in [bytes] from [start] to [end].
74 0 : Uint8List _decode(List<int> bytes, int start, int end) {
75 : /// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to
76 : /// describe the character in the exception text.
77 0 : void assertCurrentChar(int char, String name) {
78 0 : if (bytes[start] != char) {
79 0 : throw FormatException('Expected $name.', bytes, start);
80 : }
81 : }
82 :
83 0 : final buffer = Uint8Buffer();
84 0 : while (start != end) {
85 0 : switch (_state) {
86 0 : case _State.boundary:
87 0 : _size = _digitForByte(bytes, start);
88 0 : _state = _State.size;
89 0 : start++;
90 : break;
91 :
92 0 : case _State.size:
93 0 : if (bytes[start] == $cr) {
94 0 : _state = _State.sizeBeforeLF;
95 : } else {
96 : // Shift four bits left since a single hex digit contains four bits
97 : // of information.
98 0 : _size = (_size << 4) + _digitForByte(bytes, start);
99 : }
100 0 : start++;
101 : break;
102 :
103 0 : case _State.sizeBeforeLF:
104 : assertCurrentChar($lf, 'LF');
105 0 : _state = _size == 0 ? _State.endBeforeCR : _State.body;
106 0 : start++;
107 : break;
108 :
109 0 : case _State.body:
110 0 : final chunkEnd = math.min(end, start + _size);
111 0 : buffer.addAll(bytes, start, chunkEnd);
112 0 : _size -= chunkEnd - start;
113 : start = chunkEnd;
114 0 : if (_size == 0) _state = _State.bodyBeforeCR;
115 : break;
116 :
117 0 : case _State.bodyBeforeCR:
118 : assertCurrentChar($cr, 'CR');
119 0 : _state = _State.bodyBeforeLF;
120 0 : start++;
121 : break;
122 :
123 0 : case _State.bodyBeforeLF:
124 : assertCurrentChar($lf, 'LF');
125 0 : _state = _State.boundary;
126 0 : start++;
127 : break;
128 :
129 0 : case _State.endBeforeCR:
130 : assertCurrentChar($cr, 'CR');
131 0 : _state = _State.endBeforeLF;
132 0 : start++;
133 : break;
134 :
135 0 : case _State.endBeforeLF:
136 : assertCurrentChar($lf, 'LF');
137 0 : _state = _State.end;
138 0 : start++;
139 : break;
140 :
141 0 : case _State.end:
142 0 : throw FormatException('Expected no more data.', bytes, start);
143 : }
144 : }
145 0 : return buffer.buffer.asUint8List(0, buffer.length);
146 : }
147 :
148 : /// Returns the hex digit (0 through 15) corresponding to the byte at index
149 : /// [index] in [bytes].
150 : ///
151 : /// If the given byte isn't a hexadecimal ASCII character, throws a
152 : /// [FormatException].
153 0 : int _digitForByte(List<int> bytes, int index) {
154 : // If the byte is a numeral, get its value. XOR works because 0 in ASCII is
155 : // `0b110000` and the other numerals come after it in ascending order and
156 : // take up at most four bits.
157 : //
158 : // We check for digits first because it ensures there's only a single branch
159 : // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
160 : // check because branch prediction will always work on it for valid data.
161 0 : final byte = bytes[index];
162 0 : final digit = $0 ^ byte;
163 0 : if (digit <= 9) {
164 0 : if (digit >= 0) return digit;
165 : } else {
166 : // If the byte is an uppercase letter, convert it to lowercase. This works
167 : // because uppercase letters in ASCII are exactly `0b100000 = 0x20` less
168 : // than lowercase letters, so if we ensure that that bit is 1 we ensure
169 : // that the letter is lowercase.
170 0 : final letter = 0x20 | byte;
171 0 : if ($a <= letter && letter <= $f) return letter - $a + 10;
172 : }
173 :
174 0 : throw FormatException(
175 0 : 'Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.',
176 : bytes,
177 : index);
178 : }
179 : }
180 :
181 : /// An enumeration of states that [_Sink] can exist in when decoded a chunked
182 : /// message.
183 : class _State {
184 : /// The parser has fully parsed one chunk and is expecting the header for the
185 : /// next chunk.
186 : ///
187 : /// Transitions to [size].
188 : static const boundary = _State._('boundary');
189 :
190 : /// The parser has parsed at least one digit of the chunk size header, but has
191 : /// not yet parsed the `CR LF` sequence that indicates the end of that header.
192 : ///
193 : /// Transitions to [sizeBeforeLF].
194 : static const size = _State._('size');
195 :
196 : /// The parser has parsed the chunk size header and the CR character after it,
197 : /// but not the LF.
198 : ///
199 : /// Transitions to [body] or [bodyBeforeCR].
200 : static const sizeBeforeLF = _State._('size before LF');
201 :
202 : /// The parser has parsed a chunk header and possibly some of the body, but
203 : /// still needs to consume more bytes.
204 : ///
205 : /// Transitions to [bodyBeforeCR].
206 : static const body = _State._('body');
207 :
208 : // The parser has parsed all the bytes in a chunk body but not the CR LF
209 : // sequence that follows it.
210 : //
211 : // Transitions to [bodyBeforeLF].
212 : static const bodyBeforeCR = _State._('body before CR');
213 :
214 : // The parser has parsed all the bytes in a chunk body and the CR that follows
215 : // it, but not the LF after that.
216 : //
217 : // Transitions to [bounday].
218 : static const bodyBeforeLF = _State._('body before LF');
219 :
220 : /// The parser has parsed the final empty chunk but not the CR LF sequence
221 : /// that follows it.
222 : ///
223 : /// Transitions to [endBeforeLF].
224 : static const endBeforeCR = _State._('end before CR');
225 :
226 : /// The parser has parsed the final empty chunk and the CR that follows it,
227 : /// but not the LF after that.
228 : ///
229 : /// Transitions to [end].
230 : static const endBeforeLF = _State._('end before LF');
231 :
232 : /// The parser has parsed the final empty chunk as well as the CR LF that
233 : /// follows, and expects no more data.
234 : static const end = _State._('end');
235 :
236 : final String _name;
237 :
238 11 : const _State._(this._name);
239 :
240 0 : @override
241 0 : String toString() => _name;
242 : }
|