bert_tokenizer 1.0.2 copy "bert_tokenizer: ^1.0.2" to clipboard
bert_tokenizer: ^1.0.2 copied to clipboard

A lightweight, pure Dart WordPiece tokenizer for BERT NLP models. Completely independent of Flutter.

example/bert_tokenizer_example.dart

import 'dart:io';
import 'package:bert_tokenizer/bert_tokenizer.dart';

void main() {
  //Read the vocabulary file using standard Dart I/O
  final vocabFile = File('vocab.txt');

  if (!vocabFile.existsSync()) {
    print(
        'Error: Could not find vocab.txt. Make sure you are in the example/ directory.');
    return;
  }

  final vocabContent = vocabFile.readAsStringSync();

  //Initialize the tokenizer
  final tokenizer = BertTokenizer.fromStringContent(vocabContent);

  //Prepare the input for a BERT model
  final text = "Hello worldwide Dart is awesome!";
  final maxLength = 12;

  final input = tokenizer.prepareNerInput(text, maxLength);

  //Print the results
  print('--- BERT Tokenizer Example ---');
  print('Original Text:     $text');

  //Show the intermediate string tokens
  print('Tokenized Strings: ${tokenizer.tokenize(text)}');
  print('------------------------------');

  //Show the final arrays needed for the ML Model
  print('Input IDs:         ${input.inputIds}');
  print('Input Mask:        ${input.inputMask}');
  print('Segment IDs:       ${input.segmentIds}');
}
1
likes
160
points
188
downloads

Documentation

API reference

Publisher

verified publishervelarapp.com

Weekly Downloads

A lightweight, pure Dart WordPiece tokenizer for BERT NLP models. Completely independent of Flutter.

Repository (GitHub)
View/report issues

License

MIT (license)

More

Packages that depend on bert_tokenizer