splitSentences method
Implementation
List<String> splitSentences(
String text, bool useHeuristic, bool useQuotesBracketsProcessing, int maxRecoverStep, int maxRecoverLength, int recoverStep, bool useStrip) {
if (text.length > maxRecoverLength) {
maxRecoverStep = 0;
}
text = text.replaceAll("\u200b", "");
BackupManager backupManager = BackupManager();
List<String> doubleQuoteStack = [];
List<String> singleQuoteStack = [];
List<String> bracketStack = [];
List<String> tests = ["다", "요", "죠", "함", "음"];
for (int i = 0; i < text.length; i++) {
String ch = text[i];
if (tests.contains(ch)) {
if (i != text.length - 1) {
if (!endPoint.contains(text[i + 1])) {
String targetToBackup = ch + text[i + 1];
backupManager.addItem2Dict(targetToBackup, targetToBackup.hashCode.abs().toString());
}
}
}
}
text = backupManager.backup(text);
for (String s in needToReplaceZwsp) {
text = text.replaceAll(s, "\u200b$s\u200b");
}
String prev = "";
String curSentence = "";
List<String> results = [];
int curStat = Stats.DEFAULT;
int lastSingleQuotePos = 0;
int lastDoubleQuotePos = 0;
int lastBracketPos = 0;
String singleQuotePop = "'";
String doubleQuotePop = "\"";
String bracketPoP = " ";
for (int i = 0; i < text.length; i++) {
List<String> code = [".", "!", "?"];
String ch = text[i];
if (curStat == Stats.DEFAULT) {
if (Const.doubleQuotes.contains(ch)) {
if (useQuotesBracketsProcessing) {
if (Const.doubleQuotesOpenToClose.containsKey(ch)) {
doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesOpenToClose[ch] ?? "", ch);
} else {
doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesCloseToOpen[ch] ?? "", ch);
}
lastDoubleQuotePos = i;
}
} else if (Const.singleQuotes.contains(ch)) {
if (useQuotesBracketsProcessing) {
if (Const.singleQuotesOpenToClose.containsKey(ch)) {
singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesOpenToClose[ch] ?? "", ch);
} else {
singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesCloseToOpen[ch] ?? "", ch);
}
lastSingleQuotePos = i;
}
} else if (Const.bracket.contains(ch)) {
if (useQuotesBracketsProcessing) {
if (Const.bracketOpenToClose.containsKey(ch)) {
bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketOpenToClose[ch] ?? "", ch);
} else {
bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketCloseToOpen[ch] ?? "", ch);
}
lastBracketPos = i;
}
} else if (code.contains(ch)) {
if (doubleQuoteStack.isEmpty &&
singleQuoteStack.isEmpty &&
bracketStack.isEmpty &&
Utils.intToBool(Rule.table[Stats.SB]?[prev] ?? 0 & Id.PREV)) {
curStat = Stats.SB;
}
}
if (useHeuristic) {
if (ch == "다") {
if (doubleQuoteStack.isEmpty &&
singleQuoteStack.isEmpty &&
bracketStack.isEmpty &&
Utils.intToBool(Rule.table[Stats.DA]?[prev] ?? 0 & Id.PREV)) {
curStat = Stats.DA;
}
}
if (ch == "요") {
if (doubleQuoteStack.isEmpty &&
singleQuoteStack.isEmpty &&
bracketStack.isEmpty &&
Utils.intToBool(Rule.table[Stats.YO]?[prev] ?? 0 & Id.PREV)) {
curStat = Stats.YO;
}
}
if (ch == "죠") {
if (doubleQuoteStack.isEmpty &&
singleQuoteStack.isEmpty &&
bracketStack.isEmpty &&
Utils.intToBool(Rule.table[Stats.JYO]?[prev] ?? 0 & Id.PREV)) {
curStat = Stats.JYO;
}
}
if (ch == "함") {
if (doubleQuoteStack.isEmpty &&
singleQuoteStack.isEmpty &&
bracketStack.isEmpty &&
Utils.intToBool(Rule.table[Stats.HAM]?[prev] ?? 0 & Id.PREV)) {
curStat = Stats.HAM;
}
}
if (ch == "음") {
if (doubleQuoteStack.isEmpty &&
singleQuoteStack.isEmpty &&
bracketStack.isEmpty &&
Utils.intToBool(Rule.table[Stats.UM]?[prev] ?? 0 & Id.PREV)) {
curStat = Stats.UM;
}
}
}
} else {
if (Const.doubleQuotes.contains(ch)) {
lastDoubleQuotePos = i;
} else if (Const.singleQuotes.contains(ch)) {
lastSingleQuotePos = i;
} else if (Const.bracket.contains(ch)) {
lastBracketPos = i;
}
bool endIf = false;
if (!endIf) {
if (ch == " " || Utils.intToBool(Rule.table[Stats.COMMON]?[ch] ?? 0 & Id.CONT)) {
if (Utils.intToBool(Rule.table[curStat]?[Id.PREV] ?? 0 & Id.NEXT1)) {
curSentence = Base.doTrimSentPushResults(curSentence, results);
curSentence += prev;
curStat = Stats.DEFAULT;
}
endIf = true;
}
}
if (!endIf) {
if (Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.NEXT)) {
if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
curSentence += prev;
}
curStat = Stats.DEFAULT;
endIf = true;
}
}
if (!endIf) {
if (Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.NEXT1)) {
if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
curSentence = Base.doTrimSentPushResults(curSentence, results);
curSentence += prev;
curStat = Stats.DEFAULT;
}
endIf = true;
}
}
if (!endIf) {
if (Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.NEXT2)) {
if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
curSentence += prev;
} else {
curSentence = Base.doTrimSentPushResults(curSentence, results);
}
curStat = Stats.DEFAULT;
endIf = true;
}
}
if (!endIf) {
if (!Utils.intToBool(Rule.table[curStat]?[ch] ?? 0) || Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.PREV)) {
curSentence = Base.doTrimSentPushResults(curSentence, results);
if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
curSentence += prev;
}
curStat = Stats.DEFAULT;
if (Const.bracket.contains(ch)) {
if (useQuotesBracketsProcessing) {
if (Const.bracketOpenToClose.containsKey(ch)) {
bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketOpenToClose[ch] ?? "", ch);
} else {
bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketCloseToOpen[ch] ?? "", ch);
}
lastBracketPos = i;
}
} else if (Const.doubleQuotes.contains(ch)) {
if (useQuotesBracketsProcessing) {
if (Const.doubleQuotesOpenToClose.containsKey(ch)) {
doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesOpenToClose[ch] ?? "", ch);
} else {
doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesCloseToOpen[ch] ?? "", ch);
}
lastDoubleQuotePos = i;
}
} else if (Const.singleQuotes.contains(ch)) {
if (useQuotesBracketsProcessing) {
if (Const.singleQuotesOpenToClose.containsKey(ch)) {
singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesOpenToClose[ch] ?? "", ch);
} else {
singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesCloseToOpen[ch] ?? "", ch);
}
lastSingleQuotePos = i;
}
}
endIf = true;
}
}
}
if (curStat == Stats.DEFAULT || !Utils.intToBool((Rule.table[curStat]?[ch] ?? 0 & Id.NEXT1))) {
curSentence += ch;
}
prev = ch;
}
if (curSentence.isNotEmpty) {
curSentence = Base.doTrimSentPushResults(curSentence, results);
}
if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
curSentence += prev;
Base.doTrimSentPushResults(curSentence, results);
}
if (useHeuristic) {
if (text.contains("다 ")) {
results = postProcessing(results, Rule.postProcessingDa);
}
if (text.contains("요 ")) {
results = postProcessing(results, Rule.postProcessingYo);
}
if (text.contains("죠 ")) {
results = postProcessing(results, Rule.postProcessingJyo);
}
if (text.contains("함 ")) {
results = postProcessing(results, Rule.postProcessingHam);
}
if (text.contains("음 ")) {
results = postProcessing(results, Rule.postProcessingUm);
}
}
if (singleQuoteStack.isNotEmpty && recoverStep < maxRecoverStep) {
results = realignByQuote(
text, lastSingleQuotePos, singleQuotePop, useHeuristic, useQuotesBracketsProcessing, maxRecoverStep, maxRecoverLength, recoverStep + 1);
}
if (doubleQuoteStack.isNotEmpty && recoverStep < maxRecoverStep) {
results = realignByQuote(
text, lastDoubleQuotePos, doubleQuotePop, useHeuristic, useQuotesBracketsProcessing, maxRecoverStep, maxRecoverLength, recoverStep + 1);
}
if (bracketStack.isNotEmpty && recoverStep < maxRecoverStep) {
results = realignByQuote(
text, lastBracketPos, bracketPoP, useHeuristic, useQuotesBracketsProcessing, maxRecoverStep, maxRecoverLength, recoverStep + 1);
}
List<String> resultList = [];
for (String s in results) {
s = backupManager.restore(s);
s = s.replaceAll("\u200b", "");
resultList.add(useStrip ? s.trim() : s);
}
results.addAll(resultList);
return resultList;
}