/*
 * Decompiled with CFR 0.152.
 */
package kd.bos.gptas.kmbase.newsplitter.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;

public abstract class TextSplitter {
    protected int chunkSize = 4000;
    protected int chunkOverlap = 200;
    protected Function<String, Integer> lengthFunction = String::length;
    protected boolean keepSeparator;
    protected boolean addStartIndex;

    public abstract List<String> splitText(String var1);

    public int getChunkSize() {
        return this.chunkSize;
    }

    public void setChunkSize(int chunkSize) {
        this.chunkSize = chunkSize;
    }

    public int getChunkOverlap() {
        return this.chunkOverlap;
    }

    public void setChunkOverlap(int chunkOverlap) {
        this.chunkOverlap = chunkOverlap;
    }

    public Function<String, Integer> getLengthFunction() {
        return this.lengthFunction;
    }

    public void setLengthFunction(Function<String, Integer> lengthFunction) {
        this.lengthFunction = lengthFunction;
    }

    public boolean isKeepSeparator() {
        return this.keepSeparator;
    }

    public void setKeepSeparator(boolean keepSeparator) {
        this.keepSeparator = keepSeparator;
    }

    public boolean isAddStartIndex() {
        return this.addStartIndex;
    }

    public void setAddStartIndex(boolean addStartIndex) {
        this.addStartIndex = addStartIndex;
    }

    private String joinDocs(List<String> docs, String separator) {
        String text = String.join((CharSequence)separator, docs).trim();
        return text.isEmpty() ? null : text;
    }

    protected List<String> mergeSplits(List<String> splits, String separator) {
        int separatorLength = this.lengthFunction.apply(separator);
        ArrayList<String> docs = new ArrayList<String>();
        ArrayList<String> currentDoc = new ArrayList<String>(10);
        int total = 0;
        for (String d : splits) {
            int length = this.lengthFunction.apply(d);
            if (total + length + (!currentDoc.isEmpty() ? separatorLength : 0) > this.chunkSize && !currentDoc.isEmpty()) {
                String doc = this.joinDocs(currentDoc, TextSplitter.getSeparator(separator));
                if (doc != null) {
                    docs.add(doc);
                }
                while (total > this.chunkOverlap || total + length + (!currentDoc.isEmpty() ? separatorLength : 0) > this.chunkSize && total > 0) {
                    total -= this.lengthFunction.apply((String)currentDoc.get(0)) + (currentDoc.size() > 1 ? separatorLength : 0);
                    currentDoc.remove(0);
                }
            }
            currentDoc.add(d);
            total += length + (currentDoc.size() > 1 ? separatorLength : 0);
        }
        String doc = this.joinDocs(currentDoc, TextSplitter.getSeparator(separator));
        if (doc != null) {
            docs.add(doc);
        }
        return docs;
    }

    public static List<String> splitTextWithRegex(String text, String separator, boolean keepSeparator) {
        List<String> splits;
        if (StringUtils.isNotEmpty((String)separator)) {
            String newSepartor = separator;
            if (keepSeparator) {
                newSepartor = "(?<=" + separator + ")";
            }
            String[] split = text.split(newSepartor);
            splits = Arrays.asList(split);
        } else {
            splits = Collections.singletonList(text);
        }
        return splits.stream().filter(StringUtils::isNotEmpty).collect(Collectors.toList());
    }

    private static String getSeparator(String separator) {
        switch (separator) {
            case "\\n\\n": {
                return "\n\n";
            }
            case "\\n": {
                return "\n";
            }
            case "\\.": {
                return ".";
            }
            case "\\?": {
                return "?";
            }
            case "\\|": {
                return "|";
            }
        }
        return separator;
    }

    public static List<String> truncateText(String text, int maxLength) {
        ArrayList<String> truncatedTexts = new ArrayList<String>();
        if (text == null || maxLength <= 0) {
            return truncatedTexts;
        }
        String[] words = text.split("\\s+");
        StringBuilder currentText = new StringBuilder();
        for (String word : words) {
            if (currentText.length() + word.length() + 1 > maxLength) {
                if (currentText.length() > 0) {
                    truncatedTexts.add(currentText.toString().trim());
                    currentText = new StringBuilder();
                }
                while (word.length() > maxLength) {
                    int end = TextSplitter.safeSubString(word, maxLength);
                    truncatedTexts.add(word.substring(0, end));
                    word = word.substring(end);
                }
            }
            if (currentText.length() > 0) {
                currentText.append(" ");
            }
            currentText.append(word);
        }
        if (currentText.length() > 0) {
            truncatedTexts.add(currentText.toString().trim());
        }
        return truncatedTexts;
    }

    private static int safeSubString(String text, int end) {
        int startTagIndex = text.lastIndexOf("<cvp.img>", end);
        if (startTagIndex == -1) {
            return end;
        }
        int endTagIndex = text.lastIndexOf("</cvp.img>", end);
        if ((endTagIndex == -1 || endTagIndex < startTagIndex) && (endTagIndex = text.indexOf("</cvp.img>", end)) == -1) {
            return end;
        }
        endTagIndex = text.charAt(endTagIndex + 10) == ')' ? (endTagIndex += "</cvp.img>".length() + 1) : (endTagIndex += "</cvp.img>".length());
        return Math.max(endTagIndex, end);
    }

    public String getChunkOverText(String text, String separator) {
        if (text != null) {
            if (text.length() - this.chunkOverlap > 0) {
                String finalText;
                String substring = text.substring(text.length() - this.chunkOverlap);
                int i = substring.indexOf(separator);
                if (i != -1 && !(finalText = substring.substring(i + 1)).trim().isEmpty()) {
                    return finalText;
                }
                return substring;
            }
            return text;
        }
        return "";
    }
}

