package kd.ai.gai.core.rag.split;

import kd.ai.gai.core.constant.Constant_SensitiveWords;
import kd.bos.util.StringUtils;

/* loaded from: input_file:kd/ai/gai/core/rag/split/AbstractSplitter.class */
public abstract class AbstractSplitter implements ISplit {
    public static final String PARAGREPH_SEPARATOR = "\n\n\n";
    public static final String SPLIT_SENTENCE_CHARS = "。?？!！";
    public static final String SPLIT_LOWER_SENTENCE_CHARS = "。?？!！,，;；";

    public static boolean isComplete(String str) {
        if (StringUtils.isNotEmpty(str)) {
            return str.endsWith("。") || str.endsWith("?") || str.endsWith("？") || str.endsWith("!") || str.endsWith("！");
        }
        return false;
    }

    public static String[] paragraphSimpleSplit(String str) {
        return str.split(PARAGREPH_SEPARATOR);
    }

    public static int findLowerSplitfirstIndex(String str) {
        int i = 0;
        int i2 = 0;
        while (true) {
            if (i2 >= str.length()) {
                break;
            }
            if (SPLIT_LOWER_SENTENCE_CHARS.contains(str.charAt(i2) + "")) {
                i = i2 + 1;
                break;
            }
            i2++;
        }
        return i;
    }

    public static int findLastSymbolIndex(String str) {
        int max = getMax(str.lastIndexOf(12290), str.lastIndexOf(63), str.lastIndexOf(65311), str.lastIndexOf(33), str.lastIndexOf(65281));
        if (max >= 0) {
            return max;
        }
        int max2 = getMax(str.lastIndexOf(59), str.lastIndexOf(65307));
        return max2 >= 0 ? max2 : getMax(str.lastIndexOf(44), str.lastIndexOf(65292));
    }

    public static int getMax(int... iArr) {
        int i = iArr[0];
        for (int i2 = 1; i2 < iArr.length; i2++) {
            i = Math.max(i, iArr[i2]);
        }
        return i;
    }

    public static int findLowerLastSplitMaxlenIndex(String str, int i) {
        String str2 = str;
        if (str.length() > i) {
            str2 = str.substring(0, i);
        }
        int findLastSymbolIndex = findLastSymbolIndex(str2);
        if (findLastSymbolIndex >= 0) {
            findLastSymbolIndex++;
        }
        return findLastSymbolIndex;
    }

    public static String textDataPreprocess(String str) {
        return str.trim().replaceAll(" +", " ").replaceAll("(\\r?\\n(\\s*\\r?\\n)+)", Constant_SensitiveWords.LINESEPARATOR).replaceAll("\\r\\n?", Constant_SensitiveWords.LINESEPARATOR).replaceAll("\\t+", "\t").replaceAll("\\r+", Constant_SensitiveWords.LINESEPARATOR).replaceAll("\\n+", Constant_SensitiveWords.LINESEPARATOR);
    }

    public static String chunkTextPreprocess(String str) {
        return str.trim().replaceFirst("^[\n\r\t ]+", "");
    }
}
