package kd.bos.gptas.km.splitter.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;

/* loaded from: input_file:kd/bos/gptas/km/splitter/utils/TextSplitter.class */
public abstract class TextSplitter {
    protected int chunkSize = 4000;
    protected int chunkOverlap = 200;
    protected Function<String, Integer> lengthFunction = (v0) -> {
        return v0.length();
    };
    protected boolean keepSeparator;
    protected boolean addStartIndex;

    public abstract List<String> splitText(String str);

    public int getChunkSize() {
        return this.chunkSize;
    }

    public void setChunkSize(int i) {
        this.chunkSize = i;
    }

    public int getChunkOverlap() {
        return this.chunkOverlap;
    }

    public void setChunkOverlap(int i) {
        this.chunkOverlap = i;
    }

    public Function<String, Integer> getLengthFunction() {
        return this.lengthFunction;
    }

    public void setLengthFunction(Function<String, Integer> function) {
        this.lengthFunction = function;
    }

    public boolean isKeepSeparator() {
        return this.keepSeparator;
    }

    public void setKeepSeparator(boolean z) {
        this.keepSeparator = z;
    }

    public boolean isAddStartIndex() {
        return this.addStartIndex;
    }

    public void setAddStartIndex(boolean z) {
        this.addStartIndex = z;
    }

    private String joinDocs(List<String> list, String str) {
        String trim = String.join(str, list).trim();
        if (trim.isEmpty()) {
            return null;
        }
        return trim;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<String> mergeSplits(List<String> list, String str) {
        int intValue = this.lengthFunction.apply(str).intValue();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList(10);
        int i = 0;
        for (String str2 : list) {
            int intValue2 = this.lengthFunction.apply(str2).intValue();
            if (i + intValue2 + (!arrayList2.isEmpty() ? intValue : 0) > this.chunkSize) {
                if (i > this.chunkSize) {
                }
                if (!arrayList2.isEmpty()) {
                    String joinDocs = joinDocs(arrayList2, str);
                    if (joinDocs != null) {
                        arrayList.add(joinDocs);
                    }
                    while (true) {
                        if (i <= this.chunkOverlap) {
                            if (i + intValue2 + (!arrayList2.isEmpty() ? intValue : 0) <= this.chunkSize) {
                                break;
                            }
                            if (i <= 0) {
                                break;
                            }
                        }
                        i -= this.lengthFunction.apply(arrayList2.get(0)).intValue() + (arrayList2.size() > 1 ? intValue : 0);
                        arrayList2.remove(0);
                    }
                }
            }
            arrayList2.add(str2);
            i += intValue2 + (arrayList2.size() > 1 ? intValue : 0);
        }
        String joinDocs2 = joinDocs(arrayList2, str);
        if (joinDocs2 != null) {
            arrayList.add(joinDocs2);
        }
        return arrayList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v13, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r0v4, types: [java.util.List] */
    public static List<String> splitTextWithRegex(String str, String str2, boolean z) {
        ArrayList arrayList = new ArrayList();
        if (!StringUtils.isNotEmpty(str2)) {
            arrayList = Collections.singletonList(str);
        } else if (z) {
            String[] splitWithSeparator = splitWithSeparator(str, str2);
            for (int i = 0; i < splitWithSeparator.length - 1; i += 2) {
                arrayList.add(splitWithSeparator[i] + splitWithSeparator[i + 1]);
            }
            if (splitWithSeparator.length % 2 == 1) {
                arrayList.add(splitWithSeparator[splitWithSeparator.length - 1]);
            }
        } else {
            arrayList = Arrays.asList(str.split(str2));
        }
        return (List) arrayList.stream().filter(StringUtils::isNotEmpty).collect(Collectors.toList());
    }

    public static String[] splitWithSeparator(String str, String str2) {
        int i;
        ArrayList arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("(" + Pattern.quote(str2) + ")").matcher(str);
        int i2 = 0;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            int start = matcher.start();
            int end = matcher.end();
            arrayList.add(str.substring(i, start));
            arrayList.add(str.substring(start, end));
            i2 = end;
        }
        if (i < str.length()) {
            arrayList.add(str.substring(i));
        }
        return (String[]) arrayList.toArray(new String[0]);
    }
}
