AhoCorasick.fromWordList constructor
initialize the aho corasick algorithm with a given list of words
this will create the state machine. If you want to force that recognized
words are either at the beginning / end or are surounded by spaces
set separateBySpaces
to true
Implementation
factory AhoCorasick.fromWordList(List<String> patterns,
{bool separateBySpaces = false}) {
// initial state is the empty word
final states = [WordState(index: 0, words: [], failure: 0)];
var longest = 0;
// ignore: omit_local_variable_types
Map<int, Map<String, int>> transitionMap = {};
patterns.forEach((inputPattern) {
final pattern = separateBySpaces ? ' $inputPattern ' : inputPattern;
var curStateIndex = 0;
longest = max(longest, pattern.length);
for (var i = 0; i < pattern.length; i++) {
final stateMap = transitionMap[curStateIndex] ?? {};
final resState = stateMap[pattern[i]];
if (resState == null) {
final newStateIdx = states.length;
final newState = WordState(
index: newStateIdx,
words: (i == pattern.length - 1) ? [pattern] : [],
isFinal: i == pattern.length - 1);
states.add(newState);
stateMap.putIfAbsent(pattern[i], () => newStateIdx);
transitionMap.update(curStateIndex, (_) => stateMap,
ifAbsent: () => stateMap);
curStateIndex = newStateIdx;
} else {
curStateIndex = resState;
if (i == pattern.length - 1) {
final curState = WordState(
index: curStateIndex,
words: states[curStateIndex].words..add(pattern),
isFinal: i == pattern.length - 1);
states[curStateIndex] = curState;
}
}
}
});
// edge case when there are no patterns ;)
if (transitionMap.containsKey(0)) {
// if the state machine cannot follow up on a pattern it might
// follow up on other patterns those are tracked with this
// failure map
final queue = Queue<int>();
transitionMap[0]?.forEach((character, resState) {
states[resState] = states[resState].copyWith(
failure: 0,
);
queue.add(resState);
});
while (queue.isNotEmpty) {
final stateIdx = queue.removeFirst();
(transitionMap[stateIdx] ?? {}).forEach((character, resState) {
queue.add(resState);
var failStateIdx = states[stateIdx].failure;
while ((transitionMap[failStateIdx] ?? {})[character] == null) {
if (failStateIdx == 0) {
break;
}
failStateIdx = states[failStateIdx].failure;
}
failStateIdx = (transitionMap[failStateIdx] ?? {})[character] ?? 0;
states[resState] = states[resState].copyWith(
failure: failStateIdx,
words: states[resState].words..addAll(states[failStateIdx].words),
isFinal: states[failStateIdx].isFinal || states[resState].isFinal,
);
});
}
}
return AhoCorasick._(
separateBySpaces: separateBySpaces,
longestPattern: longest,
stateMachine: StateMachine<WordState, String>(
states: states,
isSuccessState: (state) => state.isFinal,
transition: (
{required WordState activeState, required String input}) {
var curState = activeState.index;
var endState = (transitionMap[curState] ?? {})[input] ?? -1;
while (endState == -1) {
endState = states[curState].failure;
curState = endState;
}
return states[(transitionMap[curState] ?? {})[input] ?? 0];
}));
}