dart_mupdf_donut 1.0.0 copy "dart_mupdf_donut: ^1.0.0" to clipboard
dart_mupdf_donut: ^1.0.0 copied to clipboard

A comprehensive pure Dart PDF library with OCR-free document understanding. Combines PyMuPDF-inspired PDF parsing (text/image extraction, annotations, page manipulation, PDF creation) with a Donut (Sw [...]

example/example.dart

// ignore_for_file: avoid_print
/// dart_mupdf_donut — Example Usage
///
/// Demonstrates both the PDF module and the Donut OCR-free document
/// understanding module.
///
/// Run with:
///   dart run example/example.dart
import 'dart:typed_data';

import 'package:dart_mupdf_donut/dart_mupdf.dart';
import 'package:dart_mupdf_donut/donut.dart';

void main() {
  _pdfExamples();
  _donutExamples();
}

// ═══════════════════════════════════════════════════════════════════════
// PDF MODULE
// ═══════════════════════════════════════════════════════════════════════

void _pdfExamples() {
  print('╔══════════════════════════════════════════╗');
  print('║  dart_mupdf — PDF Module Examples        ║');
  print('╚══════════════════════════════════════════╝\n');

  // ── Create a new PDF from scratch ──────────────────────────────────
  print('▸ Creating a new PDF...');
  final doc = DartMuPDF.createPdf();
  doc.newPage(width: 595.28, height: 841.89); // A4
  print('  Created PDF with ${doc.pageCount} page(s)');

  // ── Access page properties ─────────────────────────────────────────
  final page = doc.getPage(0);
  print('  Page size: ${page.width} × ${page.height}');
  print('  Page rect: ${page.rect}');
  print('  Rotation:  ${page.rotation}°');

  // ── Geometry types ─────────────────────────────────────────────────
  print('\n▸ Geometry types');
  final p1 = Point(100, 200);
  final p2 = Point(300, 400);
  print(
      '  Point: $p1,  distance to $p2: ${p1.distanceTo(p2).toStringAsFixed(1)}');

  final rect = Rect(50, 50, 500, 700);
  print('  Rect: $rect  (${rect.width} × ${rect.height})');
  print('  Contains $p1: ${rect.contains(p1)}');

  final matrix = Matrix.rotation(45);
  print('  Rotation matrix (45°): $matrix');

  final quad = Quad.fromRect(rect);
  print('  Quad area: ${quad.area}');

  // ── Drawing with Shape ─────────────────────────────────────────────
  print('\n▸ Drawing shapes');
  final shape = Shape(pageWidth: 595.28, pageHeight: 841.89);

  shape.drawLine(Point(50, 50), Point(200, 50));
  shape.finish(color: [0, 0, 0], width: 2);

  shape.drawRect(Rect(100, 100, 300, 200));
  shape.finish(color: [1, 0, 0], fill: [0.9, 0.9, 1.0], width: 1);

  shape.drawCircle(Point(200, 400), 50);
  shape.finish(color: [0, 0, 1], width: 1.5);

  final stream = shape.commit();
  print('  Content stream: ${stream.length} bytes');

  // ── Pixmap ─────────────────────────────────────────────────────────
  print('\n▸ Pixmap operations');
  final pixmap = Pixmap(
    colorspace: Colorspace.csRgb,
    width: 100,
    height: 100,
    hasAlpha: false,
  );
  pixmap.clearWith(255);
  for (int x = 10; x < 90; x++) {
    for (int y = 10; y < 90; y++) {
      pixmap.setPixel(x, y, [255, 0, 0]);
    }
  }
  final grayPix = pixmap.toColorspace(Colorspace.csGray);
  print('  RGB pixmap:  $pixmap');
  print('  Gray pixmap: $grayPix');

  final pngBytes = pixmap.toPng();
  print('  PNG size: ${pngBytes.length} bytes');

  // ── PDF detection utility ──────────────────────────────────────────
  print('\n▸ Utility');
  final header = Uint8List.fromList('%PDF-1.7'.codeUnits);
  print('  Is PDF: ${DartMuPDF.isPdf(header)}');
  print('  Library version: ${DartMuPDF.version}');

  doc.close();

  // ── Opening an existing PDF (uncomment with a real file) ──────────
  // final existing = DartMuPDF.openFile('invoice.pdf');
  // print('Pages: ${existing.pageCount}');
  // print('Title: ${existing.metadata.title}');
  //
  // for (int i = 0; i < existing.pageCount; i++) {
  //   final p = existing.getPage(i);
  //   print('Page ${i + 1}: ${p.getText().length} chars');
  //   print('  Images: ${p.getImages().length}');
  //   print('  Links:  ${p.getLinks().length}');
  // }
  //
  // final toc = existing.getToc();
  // for (final entry in toc) {
  //   print('${"  " * (entry.level - 1)}${entry.title} → p.${entry.pageNumber}');
  // }
  // existing.close();

  print('\n');
}

// ═══════════════════════════════════════════════════════════════════════
// DONUT MODULE
// ═══════════════════════════════════════════════════════════════════════

void _donutExamples() {
  print('╔══════════════════════════════════════════╗');
  print('║  donut — Document Understanding Examples ║');
  print('╚══════════════════════════════════════════╝\n');

  // ── 1. Tensor basics ───────────────────────────────────────────────
  print('▸ Tensor operations');
  final a = Tensor.zeros([2, 3]);
  final b = Tensor.ones([2, 3]);
  final c = a + b;
  print('  zeros + ones = $c');
  print('  Shape: ${c.shape}, size: ${c.size}');

  final x = Tensor.ones([2, 4]);
  final w = Tensor.ones([4, 3]);
  final y = x.matmul(w);
  print('  matmul [2,4] × [4,3] → ${y.shape}');

  final softmaxed = Tensor.fromList([1.0, 2.0, 3.0]).softmax(0);
  print('  softmax([1,2,3]) = ${softmaxed.data}');

  // ── 2. Neural network layers ───────────────────────────────────────
  print('\n▸ Neural network layers');
  final linear = Linear(8, 4);
  final input = Tensor.ones([1, 8]);
  final output = linear.forward(input);
  print('  Linear(8→4): input ${input.shape} → output ${output.shape}');

  final norm = LayerNorm(4);
  final normalized = norm.forward(output);
  print('  LayerNorm(4): ${normalized.shape}');

  final embed = Embedding(100, 16);
  final embedded = embed.forward([5, 10, 15]);
  print('  Embedding(100, 16) [5,10,15] → ${embedded.shape}');

  // ── 3. Tokenizer ──────────────────────────────────────────────────
  print('\n▸ Tokenizer');
  final vocab = <String, int>{
    '<s>': 0,
    '<pad>': 1,
    '</s>': 2,
    '<unk>': 3,
    '▁': 4,
    'H': 5,
    'e': 6,
    'l': 7,
    'o': 8,
    '▁world': 9,
    '▁Hello': 10,
    '<s_cord-v2>': 11,
    '</s_cord-v2>': 12,
    '<s_menu>': 13,
    '</s_menu>': 14,
    '<s_nm>': 15,
    '</s_nm>': 16,
    '<s_price>': 17,
    '</s_price>': 18,
    '<s_total>': 19,
    '</s_total>': 20,
    '<s_total_price>': 21,
    '</s_total_price>': 22,
    '<sep/>': 23,
  };
  final tokenizer = DonutTokenizer(
    vocab: vocab,
    merges: [],
    specialTokens: {
      '<s_cord-v2>',
      '</s_cord-v2>',
      '<s_menu>',
      '</s_menu>',
      '<s_nm>',
      '</s_nm>',
      '<s_price>',
      '</s_price>',
      '<s_total>',
      '</s_total>',
      '<s_total_price>',
      '</s_total_price>',
      '<sep/>',
    },
  );
  print('  Vocab size: ${tokenizer.vocabSize}');
  print('  BOS=${tokenizer.bosTokenId}, EOS=${tokenizer.eosTokenId}');

  final tokens = tokenizer.encode('▁Hello▁world');
  print('  encode("Hello world") → $tokens');
  print('  decode → "${tokenizer.decode(tokens)}"');

  // ── 4. JSON ↔ token conversion ────────────────────────────────────
  print('\n▸ JSON ↔ Donut token conversion');
  final receiptJson = {
    'menu': [
      {'nm': 'Latte', 'price': '5.00'},
      {'nm': 'Muffin', 'price': '3.50'},
    ],
    'total': {'total_price': '8.50'},
  };

  final tokenStr = DonutModel.json2token(receiptJson);
  print('  JSON → tokens: ${tokenStr.substring(0, 80)}...');

  final parsed = DonutModel.token2json(tokenStr);
  print('  tokens → JSON: $parsed');

  // ── 5. Full model pipeline (tiny config, random weights) ──────────
  print('\n▸ Full Donut model pipeline (random weights)');
  final config = DonutConfig(
    inputSize: [128, 96],
    alignLongAxis: true,
    windowSize: 4,
    encoderLayer: [2, 2],
    decoderLayer: 1,
    maxPositionEmbeddings: 256,
    maxLength: 40,
    encoderEmbedDim: 32,
    encoderNumHeads: [2, 4],
    patchSize: 4,
    decoderEmbedDim: 64,
    decoderFfnDim: 128,
    decoderNumHeads: 4,
    vocabSize: tokenizer.vocabSize,
  );

  final model = DonutModel(config);
  model.randomInit(seed: 42);
  model.setTokenizer(tokenizer);
  print('  Config: input=${config.inputSize}, '
      'encoder=[${config.encoderLayer.join(",")}], '
      'decoder=${config.decoderLayer}');

  // Create a synthetic 96×128 test image (gradient)
  final imgTensor = Tensor.zeros([1, 3, 128, 96]);
  for (int c = 0; c < 3; c++) {
    for (int h = 0; h < 128; h++) {
      for (int w = 0; w < 96; w++) {
        imgTensor.data[c * 128 * 96 + h * 96 + w] =
            (h + w + c * 50) / 300.0 - 0.5;
      }
    }
  }
  print('  Synthetic image tensor: ${imgTensor.shape}');

  // Encode
  final encoderOut = model.encode(imgTensor);
  print('  Encoder output: ${encoderOut.shape}');

  // Full inference
  final result = model.inference(
    imageTensor: imgTensor,
    prompt: '<s_cord-v2>',
    maxLength: 20,
  );
  print('  Generated ${result.tokens.length} tokens');
  print(
      '  Text: "${result.text.substring(0, result.text.length.clamp(0, 60))}"');
  print('  JSON: ${result.json}');

  // ── 6. Image preprocessing pipeline ───────────────────────────────
  print('\n▸ Image preprocessing');
  print(DonutImageUtils.describePipeline(config));

  // ── With real images (uncomment) ──────────────────────────────────
  // import 'dart:io';
  // final imgBytes = File('receipt.jpg').readAsBytesSync();
  // final tensor = DonutImageUtils.preprocessBytes(imgBytes, config);
  // print('  Preprocessed: ${tensor.shape}');
  //
  // final realResult = model.inferenceFromBytes(
  //   imageBytes: imgBytes,
  //   prompt: '<s_cord-v2>',
  // );
  // print('  ${realResult.json}');

  print('\n▸ Done! For real document understanding, load pretrained weights.');
  print('  See README.md for weight export instructions.\n');
}
1
likes
120
points
203
downloads

Documentation

Documentation
API reference

Publisher

unverified uploader

Weekly Downloads

A comprehensive pure Dart PDF library with OCR-free document understanding. Combines PyMuPDF-inspired PDF parsing (text/image extraction, annotations, page manipulation, PDF creation) with a Donut (Swin + BART) transformer for structured receipt, invoice, and form extraction from images. No native dependencies — works on all platforms.

Repository (GitHub)
View/report issues

Topics

#pdf #ocr #document-understanding #text-extraction #machine-learning

License

MIT (license)

Dependencies

archive, collection, crypto, image, path, pointycastle, xml

More

Packages that depend on dart_mupdf_donut