lazyLoad method

  1. @override
Stream<Document> lazyLoad()

Loads documents lazily.

This is useful for loading large amounts of data, as it allows you to process each Document as it is loaded, rather than waiting for the entire data set to be loaded in memory.

Implementation

@override
Stream<Document> lazyLoad() async* {
  if (glob.isEmpty) {
    throw ArgumentError('Glob pattern must not be empty');
  }

  final directory = Directory(filePath);

  var files = directory
      .listSync(recursive: recursive)
      .whereType<File>()
      .where(_shouldLoadFile)
      .map((e) => XFile(e.path));

  if (sampleSize > 0) {
    if (randomizeSample) {
      final seed = sampleSeed ?? DateTime.now().millisecondsSinceEpoch;
      files = files.toList(growable: false)..shuffle(Random(seed));
    }
    files = files.take(sampleSize);
  }

  for (final file in files) {
    final ext = path.extension(file.path).toLowerCase();

    final loaders = loaderMap.isNotEmpty ? loaderMap : defaultLoaderMap;
    final loader = loaders[ext]?.call(file.path) ?? TextLoader(file.path);

    final defaultMetadata = await _buildDefaultMetadata(file);
    final metadata =
        metadataBuilder?.call(file, defaultMetadata) ?? defaultMetadata;

    await for (final doc in loader.lazyLoad()) {
      final finalDoc = Document(
        pageContent: doc.pageContent,
        metadata: doc.metadata['source'] == file.path
            ? metadata
            : {...metadata, ...doc.metadata},
      );
      yield finalDoc;
    }
  }
}