parser_tool

Version 0.1.1 (BETA)

Parser tool is a real-time compiler and runtime engine for strongly typed PEG parsers.

Parser tool contains libraries that allow you to create strongly typed PEG parsers. The tool contains a compiler that compiles a parser in real time and an engine to execute the compiled parsers.

Advantages

  • Strongly typed parsers
  • Sufficiently high performance
  • Allows you to debug the parser through tracing support with information about the parsing progress
  • Expression notations are available during debugging
  • Allows you to print out grammar

Disvantages

  • Not as fast parsers as compared to parsers that compile directly into source code
  • The debugging process is not so clear and convenient

Planned features

  • Adding a semantic predicate

Example

import 'package:parser_tool/grammar.dart';
import 'package:parser_tool/grammar_builder.dart';
import 'package:parser_tool/grammar_compiler.dart';
import 'package:parser_tool/grammar_heplers.dart';

import '_parse_number.dart';

export 'package:parser_tool/parse.dart';

void main() {
  final text = '''
{"rocket": "🚀 flies to the stars"}
''';
  final state = ParseState(text);
  final result = parser.parse(state);
  if (result == null) {
    throw state.buildError();
  }

  print(result.value);
}

final grammar = buildGrammar();

final Parser parser = compileGrammar(grammar);

Grammar buildGrammar() {
  // Nonterminals
  final array = Nonterminal<List>('array');
  final json = Nonterminal('json');
  final member = Nonterminal<MapEntry<String, dynamic>>('member');
  final members = Nonterminal<List<MapEntry<String, dynamic>>>('members');
  final object = Nonterminal<Map<String, dynamic>>('object');
  final value = Nonterminal('value');
  final values = Nonterminal<List>('values');

  // Terminals
  final $comma = Terminal(',');
  final $eof = Terminal('end of file');
  final $false = Terminal<bool>('false');
  final $leadingSpaces = Terminal('leading spaces');
  final $lbrace = Terminal('{');
  final $lbracket = Terminal('[');
  final $number = Terminal<num>('number');
  final $null = Terminal('null');
  final $rbrace = Terminal('}');
  final $rbracket = Terminal(']');
  final $semicolon = Terminal(':');
  final $string = Terminal<String>('string');
  final $true = Terminal<bool>('true');

  // Subterminals
  final _char = Subterminal<int>('char');
  final _escaped = Subterminal<int>('escaped');
  final _hexdig = Subterminal<int>('hexdig');
  final _hexdig4 = Subterminal<int>('hexdig4');
  final _unescaped = Subterminal<int>('unescaped');
  final _ws = Subterminal('ws');

  // Nonterminals
  array << seqm([$lbracket], values.opt, [$rbracket]).map((r) => r ?? []);

  json << seqm([$leadingSpaces], value, [$eof]);

  member <<
      seqfl($string, [$semicolon], value).map((r) => MapEntry(r.$1, r.$2));

  members << repsep(member, $comma);

  object <<
      seqm([$lbrace], members.opt, [$rbrace])
          .map((r) => {}..addEntries(r ?? []));

  value << object;
  value << array;
  value << $string;
  value << $number;
  value << $true;
  value << $false;
  value << $null;

  values << repsep(value, $comma);

  // Terminals
  $comma << seq([literal(','), _ws]);

  $eof << not(any());

  $false << seqr([literal('false'), _ws], false);

  $lbrace << seq([literal('{'), _ws]);

  $leadingSpaces << _ws;

  $lbracket << seq([literal('['), _ws]);

  $null << seqr([literal('null'), _ws], null);

  final zero = '0'.r;
  final digit = '0-9'.r;
  final minus = '-'.r;
  final integer = alt<dynamic>([
    zero,
    seq(['1-9'.r, digit.star])
  ]);
  final frac = seq(['.'.r, digit.plus]);
  final exp = seq(['eE'.r, '+-'.r.opt, digit.plus]);

  $number <<
      seqf(cap(seq([minus.opt, integer, frac.opt, exp.opt])), [_ws])
          .map(parseNumber);

  $rbrace << seq([literal('}'), _ws]);

  $rbracket << seq([literal(']'), _ws]);

  $semicolon << seq([literal(':'), _ws]);

  $string <<
      seqm([literal('"')], _char.star, [literal('"'), _ws])
          .map((r) => String.fromCharCodes(r));

  $true << seqr([literal('true'), _ws], true);

  // Subterminals
  _char << _unescaped;
  _char << seql([r'\\'.r], _escaped);

  _escaped << r'\u22\u2f\u5c'.r;
  _escaped << seqr(['b'.r], 0x08);
  _escaped << seqr(['f'.r], 0x0c);
  _escaped << seqr(['n'.r], 0x0a);
  _escaped << seqr(['r'.r], 0x0d);
  _escaped << seqr(['t'.r], 0x09);
  _escaped << seqr(['v'.r], 0x0b);
  _escaped << seql(['u'.r], _hexdig4).map((r) => r);

  _hexdig << 'a-f'.r.map((c) => c - 97);
  _hexdig << 'A-F'.r.map((c) => c - 65);
  _hexdig << '0-9'.r.map((c) => c - 48);

  _hexdig4 <<
      seq4(_hexdig, _hexdig, _hexdig, _hexdig)
          .map((r) => r.$1 * 0xfff + r.$2 * 0xff + r.$3 * 0xf + r.$4);

  _unescaped << r'\u20-\u21\u23-\u5b\u5d-\u10ffff'.r;

  _ws << r' \n\r\t'.r.star;

  final builder = GrammarBuilder();
  return builder.build(json);
}

Parser<E> compileGrammar<E>(Grammar<E> grammar,
    [GrammarCompilerOptions options = const GrammarCompilerOptions()]) {
  final compiler = GrammarCompiler<E>(options);
  return compiler.compile(grammar);
}

To be continued...

Libraries

expressions
grammar
grammar_builder
grammar_compiler
grammar_helpers
grammar_printer
parse