clusterNearDuplicates function
Groups documents into near-duplicate clusters (greedy).
Implementation
List<List<int>> clusterNearDuplicates(List<String> documents, {double threshold = 0.85}) {
final List<List<int>> clusters = [];
final List<bool> used = List.filled(documents.length, false);
for (int i = 0; i < documents.length; i++) {
if (used[i]) continue;
final List<int> cluster = [i];
used[i] = true;
for (int j = i + 1; j < documents.length; j++) {
if (used[j]) continue;
if (textSimilarity(documents[i], documents[j]) >= threshold) {
cluster.add(j);
used[j] = true;
}
}
clusters.add(cluster);
}
return clusters;
}