lazyLoad method
Loads documents lazily.
This is useful for loading large amounts of data, as it allows you to
process each Document as it is loaded, rather than waiting for the
entire data set to be loaded in memory.
Implementation
@override
Stream<Document> lazyLoad() async* {
if (glob.isEmpty) {
throw ArgumentError('Glob pattern must not be empty');
}
final directory = Directory(filePath);
var files = directory
.listSync(recursive: recursive)
.whereType<File>()
.where(_shouldLoadFile)
.map((e) => XFile(e.path));
if (sampleSize > 0) {
if (randomizeSample) {
final seed = sampleSeed ?? DateTime.now().millisecondsSinceEpoch;
files = files.toList(growable: false)..shuffle(Random(seed));
}
files = files.take(sampleSize);
}
for (final file in files) {
final ext = path.extension(file.path).toLowerCase();
final loaders = loaderMap.isNotEmpty ? loaderMap : defaultLoaderMap;
final loader = loaders[ext]?.call(file.path) ?? TextLoader(file.path);
final defaultMetadata = await _buildDefaultMetadata(file);
final metadata =
metadataBuilder?.call(file, defaultMetadata) ?? defaultMetadata;
await for (final doc in loader.lazyLoad()) {
final finalDoc = Document(
pageContent: doc.pageContent,
metadata: doc.metadata['source'] == file.path
? metadata
: {...metadata, ...doc.metadata},
);
yield finalDoc;
}
}
}