query method
Performs a query against the index using the yielded lunr.Query object.
If performing programmatic queries against the index, this method is preferred over lunr.Index#search so as to avoid the additional query parsing overhead.
A query object is yielded to the supplied function which should be used to express the query to be run against the index.
Note that although this function takes a callback parameter it is not an asynchronous operation, the callback is just yielded a query object to be customized.
QueryCallback fn - A function that is used to build the query.
Implementation
List<DocMatch> query(QueryCallback fn) {
// for each query clause
// * process terms
// * expand terms from token set
// * find matching documents and metadata
// * get document vectors
// * score documents
Query query = Query(fields);
Map<FieldRef, MatchData> matchingFields = {};
Map<String, Vector> queryVectors = {};
var termFieldCache = {}, requiredMatches = {}, prohibitedMatches = {};
// To support field level boosts a query vector is created per
// field. An empty vector is eagerly created to support negated
// queries.
for (String field in fields) {
queryVectors[field] = Vector();
}
fn(query /*, query*/);
for (Clause clause in query.clauses) {
// Unless the pipeline has been disabled for this term, which is
// the case for terms with wildcards, we need to pass the clause
// term through the search pipeline. A pipeline returns an array
// of processed terms. Pipeline functions may expand the passed
// term, which means we may end up performing multiple index lookups
// for a single query term.
List<String>? terms;
lunr.Set clauseMatches = lunr.Set.empty;
if (clause.usePipeline ?? false) {
terms =
pipeline.runString(clause.term, {'fields': clause.fields ?? []});
} else {
terms = [clause.term];
}
for (String term in terms) {
/*
* Each term returned from the pipeline needs to use the same query
* clause object, e.g. the same boost and or edit distance. The
* simplest way to do this is to re-use the clause object but mutate
* its term property.
*/
clause.term = term;
/*
* From the term in the clause we create a token set which will then
* be used to intersect the indexes token set to get a list of terms
* to lookup in the inverted index
*/
TokenSet termTokenSet = TokenSet.fromClause(clause);
List<String> expandedTerms = tokenSet.intersect(termTokenSet).toArray();
/*
* If a term marked as required does not exist in the tokenSet it is
* impossible for the search to return any matches. We set all the field
* scoped required matches set to empty and stop examining any further
* clauses.
*/
if (expandedTerms.isEmpty &&
clause.presence == QueryPresence.REQUIRED) {
for (String? field in clause.fields!) {
requiredMatches[field] = lunr.Set.empty;
}
break;
}
for (String expandedTerm in expandedTerms) {
/*
* For each term get the posting and termIndex, this is required for
* building the query vector.
*/
Posting posting = invertedIndex[expandedTerm]!;
var termIndex = posting.index;
for (String? field in clause.fields ?? []) {
/*
* For each field that this query term is scoped by (by default
* all fields are in scope) we need to get all the document refs
* that have this term in that field.
*
* The posting is the entry in the invertedIndex for the matching
* term from above.
*/
Map<String, dynamic> fieldPosting =
posting[field].cast<String, dynamic>();
List<String> matchingDocumentRefs =
fieldPosting.keys.map((e) => e.toString()).toList();
String termField = '$expandedTerm/${field!}';
lunr.Set matchingDocumentsSet = lunr.Set(matchingDocumentRefs);
/*
* if the presence of this term is required ensure that the matching
* documents are added to the set of required matches for this clause.
*
*/
if (clause.presence == QueryPresence.REQUIRED) {
clauseMatches = clauseMatches.union(matchingDocumentsSet);
if (requiredMatches[field] == null) {
requiredMatches[field] = lunr.Set.complete;
}
}
/*
* if the presence of this term is prohibited ensure that the matching
* documents are added to the set of prohibited matches for this field,
* creating that set if it does not yet exist.
*/
if (clause.presence == QueryPresence.PROHIBITED) {
if (prohibitedMatches[field] == null) {
prohibitedMatches[field] = lunr.Set.empty;
}
prohibitedMatches[field] =
prohibitedMatches[field].union(matchingDocumentsSet);
/*
* Prohibited matches should not be part of the query vector used for
* similarity scoring and no metadata should be extracted so we continue
* to the next field
*/
continue;
}
/*
* The query field vector is populated using the termIndex found for
* the term and a unit value with the appropriate boost applied.
* Using upsert because there could already be an entry in the vector
* for the term we are working with. In that case we just add the scores
* together.
*/
queryVectors[field]!
.upsert(termIndex, clause.boost, (num a, num b) => a + b);
/**
* If we've already seen this term, field combo then we've already collected
* the matching documents and metadata, no need to go through all that again
*/
if (termFieldCache[termField] ?? false) {
continue;
}
for (String matchingDocumentRef in matchingDocumentRefs) {
/*
* All metadata for this term/field/document triple
* are then extracted and collected into an instance
* of lunr.MatchData ready to be returned in the query
* results
*/
FieldRef matchingFieldRef = FieldRef(matchingDocumentRef, field);
Map<String, dynamic> metadata =
fieldPosting[matchingDocumentRef].cast<String, dynamic>();
MatchData? fieldMatch = matchingFields[matchingFieldRef];
if (fieldMatch == null) {
matchingFields[matchingFieldRef] =
MatchData(expandedTerm, field, metadata);
} else {
fieldMatch.add(expandedTerm, field, metadata);
}
}
termFieldCache[termField] = true;
}
}
}
/**
* If the presence was required we need to update the requiredMatches field sets.
* We do this after all fields for the term have collected their matches because
* the clause terms presence is required in _any_ of the fields not _all_ of the
* fields.
*/
if (clause.presence == QueryPresence.REQUIRED) {
for (String? field in clause.fields ?? []) {
requiredMatches[field] =
requiredMatches[field].intersect(clauseMatches);
}
}
}
/**
* Need to combine the field scoped required and prohibited
* matching documents into a global set of required and prohibited
* matches
*/
lunr.Set allRequiredMatches = lunr.Set.complete,
allProhibitedMatches = lunr.Set.empty;
for (String field in fields) {
if (requiredMatches.containsKey(field)) {
allRequiredMatches =
allRequiredMatches.intersect(requiredMatches[field]);
}
if (prohibitedMatches.containsKey(field)) {
allProhibitedMatches =
allProhibitedMatches.union(prohibitedMatches[field]);
}
}
Iterable<dynamic> matchingFieldRefs = matchingFields.keys;
List<DocMatch> results = [];
Map<String, DocMatch> matches = {};
/*
* If the query is negated (contains only prohibited terms)
* we need to get _all_ fieldRefs currently existing in the
* index. This is only done when we know that the query is
* entirely prohibited terms to avoid any cost of getting all
* fieldRefs unnecessarily.
*
* Additionally, blank MatchData must be created to correctly
* populate the results.
*/
if (query.isNegated()) {
matchingFieldRefs = fieldVectors.keys;
for (var matchingFieldRef in matchingFieldRefs) {
FieldRef fieldRef = FieldRef.fromString(matchingFieldRef.toString());
matchingFields[fieldRef] = MatchData();
}
}
for (var matchingFieldRefs in matchingFieldRefs) {
/*
* Currently we have document fields that match the query, but we
* need to return documents. The matchData and scores are combined
* from multiple fields belonging to the same document.
*
* Scores are calculated by field, using the query vectors created
* above, and combined into a final document score using addition.
*/
var fieldRef = FieldRef.fromString(matchingFieldRefs.toString());
String docRef = fieldRef.docRef;
if (!allRequiredMatches.contains(docRef)) {
continue;
}
if (allProhibitedMatches.contains(docRef)) {
continue;
}
Vector? fieldVector = fieldVectors[fieldRef];
double score = queryVectors[fieldRef.fieldName]!.similarity(fieldVector);
DocMatch? docMatch;
if ((docMatch = matches[docRef]) != null) {
docMatch!.score += score;
docMatch.matchData.combine(matchingFields[fieldRef]!);
} else {
DocMatch match = DocMatch(
ref: docRef, score: score, matchData: matchingFields[fieldRef]!);
matches[docRef] = match;
results.add(match);
}
}
/*
* Sort the results objects by score, highest first.
*/
return results
..sort((DocMatch a, DocMatch b) {
return b.score.compareTo(a.score);
});
}