package kd.ai.gai.core.rag.split;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import kd.ai.gai.core.domain.dto.Chunk;
import kd.ai.gai.core.rag.Sentence;
import kd.ai.gai.core.rag.SplitConfig;
import kd.ai.gai.core.service.ChunkService;
import kd.bos.logging.Log;
import kd.bos.logging.LogFactory;
import kd.bos.util.StringUtils;

/* loaded from: input_file:kd/ai/gai/core/rag/split/SentenceSplitter.class */
public class SentenceSplitter extends AbstractSplitter {
    private static Log LOGGER = LogFactory.getLog(SentenceSplitter.class);

    @Override // kd.ai.gai.core.rag.split.ISplit
    public ArrayList<Sentence> split(int i, String str) {
        ArrayList<Sentence> arrayList = new ArrayList<>();
        for (String str2 : new LinkedList(Arrays.asList(paragraphSimpleSplit(str)))) {
            if (!arrayList.isEmpty()) {
                Sentence sentence = arrayList.get(arrayList.size() - 1);
                if (!sentence.isComplete()) {
                    str2 = sentence.getContent() + str2;
                }
            }
            if (StringUtils.isNotEmpty(str2)) {
                String textDataPreprocess = textDataPreprocess(str2);
                int i2 = 0;
                int length = textDataPreprocess.length();
                for (int i3 = 0; i3 < length; i3++) {
                    if (AbstractSplitter.SPLIT_SENTENCE_CHARS.contains(textDataPreprocess.charAt(i3) + "")) {
                        int i4 = i3 + 1;
                        String substring = textDataPreprocess.substring(i2, i4);
                        arrayList.add(new Sentence(i, substring, isComplete(substring)));
                        i2 = i4;
                    }
                }
                if (i2 < length) {
                    String substring2 = textDataPreprocess.substring(i2);
                    arrayList.add(new Sentence(i, substring2, isComplete(substring2)));
                }
            }
        }
        return arrayList;
    }

    @Override // kd.ai.gai.core.rag.split.ISplit
    public LinkedList<Chunk> merge(SplitConfig splitConfig, List<Sentence> list) {
        String substring;
        LinkedList<Chunk> linkedList = new LinkedList<>();
        if (list != null && !list.isEmpty()) {
            long repositoryId = splitConfig.getRepositoryId();
            long fileId = splitConfig.getFileId();
            int maxChunkLen = splitConfig.getMaxChunkLen();
            int page = list.get(0).getPage();
            int startChunkOrder = splitConfig.getStartChunkOrder();
            String str = "";
            boolean z = false;
            for (int i = 0; i < list.size(); i++) {
                Sentence sentence = list.get(i);
                String content = sentence.getContent();
                int page2 = sentence.getPage();
                String str2 = str + content;
                boolean isComplete = sentence.isComplete();
                if (str2.length() == maxChunkLen) {
                    linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess(str2), isComplete));
                    str = "";
                    if (i < list.size() - 1) {
                        int page3 = list.get(i + 1).getPage();
                        if (page != page3) {
                            page = page3;
                            startChunkOrder = 1;
                        } else {
                            startChunkOrder++;
                        }
                    }
                } else if (str2.length() > maxChunkLen) {
                    if (str.length() > maxChunkLen || !z) {
                        int findLowerLastSplitMaxlenIndex = findLowerLastSplitMaxlenIndex(str2, maxChunkLen);
                        int i2 = findLowerLastSplitMaxlenIndex < 0 ? maxChunkLen : findLowerLastSplitMaxlenIndex;
                        substring = str2.substring(0, i2);
                        str = str2.substring(i2);
                    } else {
                        substring = str;
                        str = content;
                    }
                    String chunkTextPreprocess = chunkTextPreprocess(substring);
                    linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess, isComplete(chunkTextPreprocess)));
                    if (page != page2) {
                        startChunkOrder = 1;
                        page = page2;
                    } else {
                        startChunkOrder++;
                    }
                } else {
                    str = str2;
                    z = isComplete;
                }
            }
            if (StringUtils.isNotEmpty(str)) {
                while (str.length() > maxChunkLen) {
                    int findLowerLastSplitMaxlenIndex2 = findLowerLastSplitMaxlenIndex(str, maxChunkLen);
                    int i3 = findLowerLastSplitMaxlenIndex2 < 0 ? maxChunkLen : findLowerLastSplitMaxlenIndex2;
                    String substring2 = str.substring(0, i3);
                    str = str.substring(i3);
                    String chunkTextPreprocess2 = chunkTextPreprocess(substring2);
                    linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess2, isComplete(chunkTextPreprocess2)));
                    startChunkOrder++;
                }
                linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess(str), z));
            }
        }
        return linkedList;
    }

    @Override // kd.ai.gai.core.rag.split.ISplit
    public boolean mergeToDB(SplitConfig splitConfig, List<Sentence> list) {
        String substring;
        LinkedList linkedList = new LinkedList();
        if (list == null || list.isEmpty()) {
            return true;
        }
        long repositoryId = splitConfig.getRepositoryId();
        long fileId = splitConfig.getFileId();
        int maxChunkLen = splitConfig.getMaxChunkLen();
        int page = list.get(0).getPage();
        int startChunkOrder = splitConfig.getStartChunkOrder();
        String str = "";
        boolean z = false;
        int i = 0;
        while (i < list.size()) {
            Sentence sentence = list.get(i);
            String content = sentence.getContent();
            int page2 = sentence.getPage();
            String str2 = str + content;
            boolean isComplete = sentence.isComplete();
            if (str2.length() == maxChunkLen) {
                linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess(str2), isComplete));
                str = "";
                if (i < list.size() - 1) {
                    int i2 = i;
                    i++;
                    int page3 = list.get(i2).getPage();
                    if (page != page3) {
                        page = page3;
                        startChunkOrder = 1;
                    } else {
                        startChunkOrder++;
                    }
                }
            } else if (str2.length() > maxChunkLen) {
                if (str.length() > maxChunkLen || !z) {
                    int findLowerLastSplitMaxlenIndex = findLowerLastSplitMaxlenIndex(str2, maxChunkLen);
                    int i3 = findLowerLastSplitMaxlenIndex < 0 ? maxChunkLen : findLowerLastSplitMaxlenIndex;
                    substring = str2.substring(0, i3);
                    str = str2.substring(i3);
                } else {
                    substring = str;
                    str = content;
                }
                String chunkTextPreprocess = chunkTextPreprocess(substring);
                linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess, isComplete(chunkTextPreprocess)));
                if (page != page2) {
                    startChunkOrder = 1;
                    page = page2;
                } else {
                    startChunkOrder++;
                }
            } else {
                str = str2;
                z = isComplete;
            }
            ChunkService.batchConditionInsert(linkedList);
            i++;
        }
        if (StringUtils.isNotEmpty(str)) {
            while (str.length() > maxChunkLen) {
                int findLowerLastSplitMaxlenIndex2 = findLowerLastSplitMaxlenIndex(str, maxChunkLen);
                int i4 = findLowerLastSplitMaxlenIndex2 < 0 ? maxChunkLen : findLowerLastSplitMaxlenIndex2;
                String substring2 = str.substring(0, i4);
                str = str.substring(i4);
                String chunkTextPreprocess2 = chunkTextPreprocess(substring2);
                linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess2, isComplete(chunkTextPreprocess2)));
                startChunkOrder++;
            }
            linkedList.add(new Chunk(repositoryId, fileId, page, startChunkOrder, chunkTextPreprocess(str), z));
        }
        ChunkService.insert(linkedList);
        return true;
    }
}
