preprocessImage method - DonutImageUtils class - donut library

preprocessImage static method

Tensor preprocessImage(

Image image,
DonutConfig config

)

Preprocess a decoded image for Donut inference.

image: decoded image config: Donut configuration

Returns a tensor of shape (1, 3, height, width).

Implementation

static Tensor preprocessImage(img.Image image, DonutConfig config) {
  final targetH = config.inputSize[0];
  final targetW = config.inputSize[1];

  var processed = image;

  // Ensure RGB format (convert grayscale by duplicating channels)
  if (processed.numChannels < 3) {
    final rgb = img.Image(width: processed.width, height: processed.height);
    for (int y = 0; y < processed.height; y++) {
      for (int x = 0; x < processed.width; x++) {
        final p = processed.getPixel(x, y);
        final gray = p.r;
        rgb.setPixelRgb(x, y, gray, gray, gray);
      }
    }
    processed = rgb;
  }

  // Align long axis: rotate if needed
  if (config.alignLongAxis) {
    final isLandscape = processed.width > processed.height;
    final targetIsLandscape = targetW > targetH;
    if (isLandscape != targetIsLandscape) {
      processed = img.copyRotate(processed, angle: 90);
    }
  }

  // Resize maintaining aspect ratio
  processed = _resizeMaintainAspect(processed, targetW, targetH);

  // Pad to exact target size
  processed = _padToSize(processed, targetW, targetH);

  // Convert to normalized tensor
  return _imageToTensor(processed);
}