/*
 * Decompiled with CFR 0.152.
 */
package kd.ai.gai.core.rag.split;

import kd.ai.gai.core.rag.split.ISplit;
import kd.bos.util.StringUtils;

public abstract class AbstractSplitter
implements ISplit {
    public static final String PARAGREPH_SEPARATOR = "\n\n\n";
    public static final String SPLIT_SENTENCE_CHARS = "\u3002?\uff1f!\uff01";
    public static final String SPLIT_LOWER_SENTENCE_CHARS = "\u3002?\uff1f!\uff01,\uff0c;\uff1b";

    public static boolean isComplete(String str) {
        if (StringUtils.isNotEmpty((String)str)) {
            return str.endsWith("\u3002") || str.endsWith("?") || str.endsWith("\uff1f") || str.endsWith("!") || str.endsWith("\uff01");
        }
        return false;
    }

    public static String[] paragraphSimpleSplit(String input) {
        return input.split(PARAGREPH_SEPARATOR);
    }

    public static int findLowerSplitfirstIndex(String content) {
        int indexToSplitAt = 0;
        for (int i = 0; i < content.length(); ++i) {
            if (!SPLIT_LOWER_SENTENCE_CHARS.contains(content.charAt(i) + "")) continue;
            indexToSplitAt = i + 1;
            break;
        }
        return indexToSplitAt;
    }

    public static int findLastSymbolIndex(String content) {
        int superIndex = AbstractSplitter.getMax(content.lastIndexOf(12290), content.lastIndexOf(63), content.lastIndexOf(65311), content.lastIndexOf(33), content.lastIndexOf(65281));
        if (superIndex >= 0) {
            return superIndex;
        }
        int secendIndex = AbstractSplitter.getMax(content.lastIndexOf(59), content.lastIndexOf(65307));
        if (secendIndex >= 0) {
            return secendIndex;
        }
        int lowerIndex = AbstractSplitter.getMax(content.lastIndexOf(44), content.lastIndexOf(65292));
        return lowerIndex;
    }

    public static int getMax(int ... ints) {
        int bigger = ints[0];
        for (int i = 1; i < ints.length; ++i) {
            bigger = Math.max(bigger, ints[i]);
        }
        return bigger;
    }

    public static int findLowerLastSplitMaxlenIndex(String content, int maxSplitLen) {
        int indexToSplitAt;
        String findSourceContent = content;
        if (content.length() > maxSplitLen) {
            findSourceContent = content.substring(0, maxSplitLen);
        }
        if ((indexToSplitAt = AbstractSplitter.findLastSymbolIndex(findSourceContent)) >= 0) {
            ++indexToSplitAt;
        }
        return indexToSplitAt;
    }

    public static String textDataPreprocess(String input) {
        String result = input.trim().replaceAll(" +", " ").replaceAll("(\\r?\\n(\\s*\\r?\\n)+)", "\n").replaceAll("\\r\\n?", "\n").replaceAll("\\t+", "\t").replaceAll("\\r+", "\n").replaceAll("\\n+", "\n");
        return result;
    }

    public static String chunkTextPreprocess(String input) {
        String result = input.trim().replaceFirst("^[\n\r\t ]+", "");
        return result;
    }
}

