parseConllu function

List<UDToken> parseConllu(
  1. String conllu
)

Parses a single-sentence CoNLL-U string into a flat list of UDTokens.

Implementation

List<UDToken> parseConllu(String conllu) {
  final tokens = <UDToken>[];
  for (final line in conllu.split('\n')) {
    if (line.isEmpty || line.startsWith('#')) continue;
    final p = line.split('\t');
    if (p.length < 8) continue;
    if (p[0].contains('-') || p[0].contains('.')) continue;
    final id = int.tryParse(p[0]);
    if (id == null) continue;
    final f = UDToken.parseFeats(p[5]);
    tokens.add(UDToken(
      id:     id,
      form:   p[1],
      lemma:  p[2],
      upos:   p[3],
      deprel: p[7],
      head:   int.tryParse(p[6]) ?? 0,
      gender: f.gender,
      number: f.number,
      degree: f.degree,
    ));
  }
  return tokens;
}