/*
 * Decompiled with CFR 0.152.
 */
package kd.bos.gptas.km.splitter.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;

public abstract class TextSplitter {
    protected int chunkSize = 4000;
    protected int chunkOverlap = 200;
    protected Function<String, Integer> lengthFunction = String::length;
    protected boolean keepSeparator;
    protected boolean addStartIndex;

    public abstract List<String> splitText(String var1);

    public int getChunkSize() {
        return this.chunkSize;
    }

    public void setChunkSize(int chunkSize) {
        this.chunkSize = chunkSize;
    }

    public int getChunkOverlap() {
        return this.chunkOverlap;
    }

    public void setChunkOverlap(int chunkOverlap) {
        this.chunkOverlap = chunkOverlap;
    }

    public Function<String, Integer> getLengthFunction() {
        return this.lengthFunction;
    }

    public void setLengthFunction(Function<String, Integer> lengthFunction) {
        this.lengthFunction = lengthFunction;
    }

    public boolean isKeepSeparator() {
        return this.keepSeparator;
    }

    public void setKeepSeparator(boolean keepSeparator) {
        this.keepSeparator = keepSeparator;
    }

    public boolean isAddStartIndex() {
        return this.addStartIndex;
    }

    public void setAddStartIndex(boolean addStartIndex) {
        this.addStartIndex = addStartIndex;
    }

    private String joinDocs(List<String> docs, String separator) {
        String text = String.join((CharSequence)separator, docs).trim();
        return text.isEmpty() ? null : text;
    }

    protected List<String> mergeSplits(List<String> splits, String separator) {
        int separatorLength = this.lengthFunction.apply(separator);
        ArrayList<String> docs = new ArrayList<String>();
        ArrayList<String> currentDoc = new ArrayList<String>(10);
        int total = 0;
        for (String d : splits) {
            int length = this.lengthFunction.apply(d);
            if (total + length + (!currentDoc.isEmpty() ? separatorLength : 0) > this.chunkSize) {
                if (total > this.chunkSize) {
                    // empty if block
                }
                if (!currentDoc.isEmpty()) {
                    String doc = this.joinDocs(currentDoc, separator);
                    if (doc != null) {
                        docs.add(doc);
                    }
                    while (total > this.chunkOverlap || total + length + (!currentDoc.isEmpty() ? separatorLength : 0) > this.chunkSize && total > 0) {
                        total -= this.lengthFunction.apply((String)currentDoc.get(0)) + (currentDoc.size() > 1 ? separatorLength : 0);
                        currentDoc.remove(0);
                    }
                }
            }
            currentDoc.add(d);
            total += length + (currentDoc.size() > 1 ? separatorLength : 0);
        }
        String doc = this.joinDocs(currentDoc, separator);
        if (doc != null) {
            docs.add(doc);
        }
        return docs;
    }

    public static List<String> splitTextWithRegex(String text, String separator, boolean keepSeparator) {
        List<Object> splits = new ArrayList();
        if (StringUtils.isNotEmpty((String)separator)) {
            if (keepSeparator) {
                String[] parts = TextSplitter.splitWithSeparator(text, separator);
                for (int i = 0; i < parts.length - 1; i += 2) {
                    splits.add(parts[i] + parts[i + 1]);
                }
                if (parts.length % 2 == 1) {
                    splits.add(parts[parts.length - 1]);
                }
            } else {
                splits = Arrays.asList(text.split(separator));
            }
        } else {
            splits = Collections.singletonList(text);
        }
        return splits.stream().filter(StringUtils::isNotEmpty).collect(Collectors.toList());
    }

    public static String[] splitWithSeparator(String text, String separator) {
        ArrayList<String> splits = new ArrayList<String>();
        Pattern pattern = Pattern.compile("(" + Pattern.quote(separator) + ")");
        Matcher matcher = pattern.matcher(text);
        int prevEnd = 0;
        while (matcher.find()) {
            int start = matcher.start();
            int end = matcher.end();
            splits.add(text.substring(prevEnd, start));
            splits.add(text.substring(start, end));
            prevEnd = end;
        }
        if (prevEnd < text.length()) {
            splits.add(text.substring(prevEnd));
        }
        return splits.toArray(new String[0]);
    }
}

