splitSentences method

List<String> splitSentences(
  1. String text,
  2. bool useHeuristic,
  3. bool useQuotesBracketsProcessing,
  4. int maxRecoverStep,
  5. int maxRecoverLength,
  6. int recoverStep,
  7. bool useStrip,
)

Implementation

List<String> splitSentences(
    String text, bool useHeuristic, bool useQuotesBracketsProcessing, int maxRecoverStep, int maxRecoverLength, int recoverStep, bool useStrip) {
  if (text.length > maxRecoverLength) {
    maxRecoverStep = 0;
  }

  text = text.replaceAll("\u200b", "");
  BackupManager backupManager = BackupManager();

  List<String> doubleQuoteStack = [];
  List<String> singleQuoteStack = [];
  List<String> bracketStack = [];
  List<String> tests = ["다", "요", "죠", "함", "음"];

  for (int i = 0; i < text.length; i++) {
    String ch = text[i];
    if (tests.contains(ch)) {
      if (i != text.length - 1) {
        if (!endPoint.contains(text[i + 1])) {
          String targetToBackup = ch + text[i + 1];
          backupManager.addItem2Dict(targetToBackup, targetToBackup.hashCode.abs().toString());
        }
      }
    }
  }

  text = backupManager.backup(text);
  for (String s in needToReplaceZwsp) {
    text = text.replaceAll(s, "\u200b$s\u200b");
  }

  String prev = "";
  String curSentence = "";
  List<String> results = [];
  int curStat = Stats.DEFAULT;

  int lastSingleQuotePos = 0;
  int lastDoubleQuotePos = 0;
  int lastBracketPos = 0;

  String singleQuotePop = "'";
  String doubleQuotePop = "\"";
  String bracketPoP = " ";

  for (int i = 0; i < text.length; i++) {
    List<String> code = [".", "!", "?"];
    String ch = text[i];

    if (curStat == Stats.DEFAULT) {
      if (Const.doubleQuotes.contains(ch)) {
        if (useQuotesBracketsProcessing) {
          if (Const.doubleQuotesOpenToClose.containsKey(ch)) {
            doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesOpenToClose[ch] ?? "", ch);
          } else {
            doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesCloseToOpen[ch] ?? "", ch);
          }
          lastDoubleQuotePos = i;
        }
      } else if (Const.singleQuotes.contains(ch)) {
        if (useQuotesBracketsProcessing) {
          if (Const.singleQuotesOpenToClose.containsKey(ch)) {
            singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesOpenToClose[ch] ?? "", ch);
          } else {
            singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesCloseToOpen[ch] ?? "", ch);
          }
          lastSingleQuotePos = i;
        }
      } else if (Const.bracket.contains(ch)) {
        if (useQuotesBracketsProcessing) {
          if (Const.bracketOpenToClose.containsKey(ch)) {
            bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketOpenToClose[ch] ?? "", ch);
          } else {
            bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketCloseToOpen[ch] ?? "", ch);
          }
          lastBracketPos = i;
        }
      } else if (code.contains(ch)) {
        if (doubleQuoteStack.isEmpty &&
            singleQuoteStack.isEmpty &&
            bracketStack.isEmpty &&
            Utils.intToBool(Rule.table[Stats.SB]?[prev] ?? 0 & Id.PREV)) {
          curStat = Stats.SB;
        }
      }

      if (useHeuristic) {
        if (ch == "다") {
          if (doubleQuoteStack.isEmpty &&
              singleQuoteStack.isEmpty &&
              bracketStack.isEmpty &&
              Utils.intToBool(Rule.table[Stats.DA]?[prev] ?? 0 & Id.PREV)) {
            curStat = Stats.DA;
          }
        }

        if (ch == "요") {
          if (doubleQuoteStack.isEmpty &&
              singleQuoteStack.isEmpty &&
              bracketStack.isEmpty &&
              Utils.intToBool(Rule.table[Stats.YO]?[prev] ?? 0 & Id.PREV)) {
            curStat = Stats.YO;
          }
        }
        if (ch == "죠") {
          if (doubleQuoteStack.isEmpty &&
              singleQuoteStack.isEmpty &&
              bracketStack.isEmpty &&
              Utils.intToBool(Rule.table[Stats.JYO]?[prev] ?? 0 & Id.PREV)) {
            curStat = Stats.JYO;
          }
        }
        if (ch == "함") {
          if (doubleQuoteStack.isEmpty &&
              singleQuoteStack.isEmpty &&
              bracketStack.isEmpty &&
              Utils.intToBool(Rule.table[Stats.HAM]?[prev] ?? 0 & Id.PREV)) {
            curStat = Stats.HAM;
          }
        }
        if (ch == "음") {
          if (doubleQuoteStack.isEmpty &&
              singleQuoteStack.isEmpty &&
              bracketStack.isEmpty &&
              Utils.intToBool(Rule.table[Stats.UM]?[prev] ?? 0 & Id.PREV)) {
            curStat = Stats.UM;
          }
        }
      }
    } else {
      if (Const.doubleQuotes.contains(ch)) {
        lastDoubleQuotePos = i;
      } else if (Const.singleQuotes.contains(ch)) {
        lastSingleQuotePos = i;
      } else if (Const.bracket.contains(ch)) {
        lastBracketPos = i;
      }

      bool endIf = false;
      if (!endIf) {
        if (ch == " " || Utils.intToBool(Rule.table[Stats.COMMON]?[ch] ?? 0 & Id.CONT)) {
          if (Utils.intToBool(Rule.table[curStat]?[Id.PREV] ?? 0 & Id.NEXT1)) {
            curSentence = Base.doTrimSentPushResults(curSentence, results);

            curSentence += prev;
            curStat = Stats.DEFAULT;
          }
          endIf = true;
        }
      }
      if (!endIf) {
        if (Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.NEXT)) {
          if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
            curSentence += prev;
          }
          curStat = Stats.DEFAULT;
          endIf = true;
        }
      }
      if (!endIf) {
        if (Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.NEXT1)) {
          if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
            curSentence = Base.doTrimSentPushResults(curSentence, results);

            curSentence += prev;
            curStat = Stats.DEFAULT;
          }
          endIf = true;
        }
      }

      if (!endIf) {
        if (Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.NEXT2)) {
          if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
            curSentence += prev;
          } else {
            curSentence = Base.doTrimSentPushResults(curSentence, results);
          }
          curStat = Stats.DEFAULT;
          endIf = true;
        }
      }
      if (!endIf) {
        if (!Utils.intToBool(Rule.table[curStat]?[ch] ?? 0) || Utils.intToBool(Rule.table[curStat]?[ch] ?? 0 & Id.PREV)) {
          curSentence = Base.doTrimSentPushResults(curSentence, results);

          if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
            curSentence += prev;
          }

          curStat = Stats.DEFAULT;

          if (Const.bracket.contains(ch)) {
            if (useQuotesBracketsProcessing) {
              if (Const.bracketOpenToClose.containsKey(ch)) {
                bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketOpenToClose[ch] ?? "", ch);
              } else {
                bracketPoP = Base.doPushPopSymbol(bracketStack, Const.bracketCloseToOpen[ch] ?? "", ch);
              }
              lastBracketPos = i;
            }
          } else if (Const.doubleQuotes.contains(ch)) {
            if (useQuotesBracketsProcessing) {
              if (Const.doubleQuotesOpenToClose.containsKey(ch)) {
                doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesOpenToClose[ch] ?? "", ch);
              } else {
                doubleQuotePop = Base.doPushPopSymbol(doubleQuoteStack, Const.doubleQuotesCloseToOpen[ch] ?? "", ch);
              }
              lastDoubleQuotePos = i;
            }
          } else if (Const.singleQuotes.contains(ch)) {
            if (useQuotesBracketsProcessing) {
              if (Const.singleQuotesOpenToClose.containsKey(ch)) {
                singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesOpenToClose[ch] ?? "", ch);
              } else {
                singleQuotePop = Base.doPushPopSymbol(singleQuoteStack, Const.singleQuotesCloseToOpen[ch] ?? "", ch);
              }
              lastSingleQuotePos = i;
            }
          }
          endIf = true;
        }
      }
    }

    if (curStat == Stats.DEFAULT || !Utils.intToBool((Rule.table[curStat]?[ch] ?? 0 & Id.NEXT1))) {
      curSentence += ch;
    }

    prev = ch;
  }

  if (curSentence.isNotEmpty) {
    curSentence = Base.doTrimSentPushResults(curSentence, results);
  }
  if (Utils.intToBool(Rule.table[curStat]?[prev] ?? 0 & Id.NEXT1)) {
    curSentence += prev;
    Base.doTrimSentPushResults(curSentence, results);
  }

  if (useHeuristic) {
    if (text.contains("다 ")) {
      results = postProcessing(results, Rule.postProcessingDa);
    }
    if (text.contains("요 ")) {
      results = postProcessing(results, Rule.postProcessingYo);
    }
    if (text.contains("죠 ")) {
      results = postProcessing(results, Rule.postProcessingJyo);
    }
    if (text.contains("함 ")) {
      results = postProcessing(results, Rule.postProcessingHam);
    }
    if (text.contains("음 ")) {
      results = postProcessing(results, Rule.postProcessingUm);
    }
  }
  if (singleQuoteStack.isNotEmpty && recoverStep < maxRecoverStep) {
    results = realignByQuote(
        text, lastSingleQuotePos, singleQuotePop, useHeuristic, useQuotesBracketsProcessing, maxRecoverStep, maxRecoverLength, recoverStep + 1);
  }
  if (doubleQuoteStack.isNotEmpty && recoverStep < maxRecoverStep) {
    results = realignByQuote(
        text, lastDoubleQuotePos, doubleQuotePop, useHeuristic, useQuotesBracketsProcessing, maxRecoverStep, maxRecoverLength, recoverStep + 1);
  }
  if (bracketStack.isNotEmpty && recoverStep < maxRecoverStep) {
    results = realignByQuote(
        text, lastBracketPos, bracketPoP, useHeuristic, useQuotesBracketsProcessing, maxRecoverStep, maxRecoverLength, recoverStep + 1);
  }

  List<String> resultList = [];

  for (String s in results) {
    s = backupManager.restore(s);
    s = s.replaceAll("\u200b", "");
    resultList.add(useStrip ? s.trim() : s);
  }

  results.addAll(resultList);
  return resultList;
}