/*
 * Decompiled with CFR 0.152.
 */
package kd.ai.gai.core.rag.split;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import kd.ai.gai.core.domain.dto.Chunk;
import kd.ai.gai.core.rag.MergeConfig;
import kd.ai.gai.core.rag.RepoChunkOperateConfig;
import kd.ai.gai.core.rag.Sentence;
import kd.ai.gai.core.rag.split.AbstractSplitter;
import kd.ai.gai.core.service.ChunkService;
import kd.bos.logging.Log;
import kd.bos.logging.LogFactory;
import kd.bos.util.StringUtils;

public class SentenceSplitter
extends AbstractSplitter {
    private static Log LOGGER = LogFactory.getLog(SentenceSplitter.class);

    @Override
    public ArrayList<Sentence> split(int page, String content, RepoChunkOperateConfig repoChunkOperateConfig) {
        ArrayList<Sentence> sentences = new ArrayList<Sentence>();
        LinkedList<String> paragraphs = new LinkedList<String>(Arrays.asList(SentenceSplitter.paragraphSimpleSplit(content)));
        String notEndParagraph = "";
        for (String paragraph : paragraphs) {
            if (StringUtils.isNotEmpty((String)notEndParagraph) && !SentenceSplitter.isComplete(notEndParagraph)) {
                paragraph = notEndParagraph + paragraph;
            }
            if (!StringUtils.isNotEmpty((String)paragraph)) continue;
            paragraph = SentenceSplitter.textDataPreprocess(paragraph);
            int perSplitIndex = 0;
            int paragraphLen = paragraph.length();
            for (int j = 0; j < paragraphLen; ++j) {
                if (!"\u3002?\uff1f!\uff01".contains(paragraph.charAt(j) + "")) continue;
                int splitIndex = j + 1;
                String sentenceContent = paragraph.substring(perSplitIndex, splitIndex);
                Sentence sentence = new Sentence(page, sentenceContent, SentenceSplitter.isComplete(sentenceContent));
                sentences.add(sentence);
                notEndParagraph = "";
                perSplitIndex = splitIndex;
            }
            if (perSplitIndex >= paragraphLen) continue;
            notEndParagraph = paragraph.substring(perSplitIndex);
        }
        if (StringUtils.isNotEmpty((String)notEndParagraph)) {
            Sentence sentence = new Sentence(page, notEndParagraph, SentenceSplitter.isComplete(notEndParagraph));
            sentences.add(sentence);
        }
        return sentences;
    }

    @Override
    public LinkedList<Chunk> merge(MergeConfig mergeConfig, List<Sentence> sentences) {
        LinkedList<Chunk> chunks = new LinkedList<Chunk>();
        if (sentences != null && !sentences.isEmpty()) {
            long repositoryId = mergeConfig.getRepositoryId();
            long fileId = mergeConfig.getFileId();
            int maxChunkLen = mergeConfig.getMaxChunkLen();
            int perPage = sentences.get(0).getPage();
            int startCount = mergeConfig.getStartChunkOrder();
            String chunkContent = "";
            boolean chunkComplete = false;
            for (int i = 0; i < sentences.size(); ++i) {
                Sentence sentence = sentences.get(i);
                String sentenceContent = sentence.getContent();
                int sentencePage = sentence.getPage();
                String mergeChunkContent = chunkContent + sentenceContent;
                boolean complete = sentence.isComplete();
                if (mergeChunkContent.length() == maxChunkLen) {
                    mergeChunkContent = SentenceSplitter.chunkTextPreprocess(mergeChunkContent);
                    Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, mergeChunkContent, complete);
                    chunks.add(chunk);
                    chunkContent = "";
                    if (i >= sentences.size() - 1) continue;
                    int nextSentencePage = sentences.get(i + 1).getPage();
                    if (perPage != nextSentencePage) {
                        perPage = nextSentencePage;
                        startCount = 1;
                        continue;
                    }
                    ++startCount;
                    continue;
                }
                if (mergeChunkContent.length() > maxChunkLen) {
                    String maxChunkContent;
                    if (chunkContent.length() <= maxChunkLen && chunkComplete) {
                        maxChunkContent = chunkContent;
                        chunkContent = sentenceContent;
                    } else {
                        int lowerLastSplitMaxlenIndex = SentenceSplitter.findLowerLastSplitMaxlenIndex(mergeChunkContent, maxChunkLen);
                        int splitLen = lowerLastSplitMaxlenIndex < 0 ? maxChunkLen : lowerLastSplitMaxlenIndex;
                        maxChunkContent = mergeChunkContent.substring(0, splitLen);
                        chunkContent = mergeChunkContent.substring(splitLen);
                    }
                    maxChunkContent = SentenceSplitter.chunkTextPreprocess(maxChunkContent);
                    Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, maxChunkContent, SentenceSplitter.isComplete(maxChunkContent));
                    chunks.add(chunk);
                    if (perPage != sentencePage) {
                        startCount = 1;
                        perPage = sentencePage;
                        continue;
                    }
                    ++startCount;
                    continue;
                }
                chunkContent = mergeChunkContent;
                chunkComplete = complete;
            }
            if (StringUtils.isNotEmpty((String)chunkContent)) {
                while (chunkContent.length() > maxChunkLen) {
                    int lowerLastSplitMaxlenIndex = SentenceSplitter.findLowerLastSplitMaxlenIndex(chunkContent, maxChunkLen);
                    int splitLen = lowerLastSplitMaxlenIndex < 0 ? maxChunkLen : lowerLastSplitMaxlenIndex;
                    String maxChunkContent = chunkContent.substring(0, splitLen);
                    chunkContent = chunkContent.substring(splitLen);
                    maxChunkContent = SentenceSplitter.chunkTextPreprocess(maxChunkContent);
                    Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, maxChunkContent, SentenceSplitter.isComplete(maxChunkContent));
                    chunks.add(chunk);
                    ++startCount;
                }
                chunkContent = SentenceSplitter.chunkTextPreprocess(chunkContent);
                Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, chunkContent, chunkComplete);
                chunks.add(chunk);
            }
        }
        return chunks;
    }

    @Override
    public boolean mergeToDB(MergeConfig mergeConfig, List<Sentence> sentences) {
        LinkedList<Chunk> chunks = new LinkedList<Chunk>();
        if (sentences != null && !sentences.isEmpty()) {
            long repositoryId = mergeConfig.getRepositoryId();
            long fileId = mergeConfig.getFileId();
            int maxChunkLen = mergeConfig.getMaxChunkLen();
            int perPage = sentences.get(0).getPage();
            int startCount = mergeConfig.getStartChunkOrder();
            String chunkContent = "";
            boolean chunkComplete = false;
            for (int i = 0; i < sentences.size(); ++i) {
                Sentence sentence = sentences.get(i);
                String sentenceContent = sentence.getContent();
                int sentencePage = sentence.getPage();
                String mergeChunkContent = chunkContent + sentenceContent;
                boolean complete = sentence.isComplete();
                if (mergeChunkContent.length() == maxChunkLen) {
                    mergeChunkContent = SentenceSplitter.chunkTextPreprocess(mergeChunkContent);
                    Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, mergeChunkContent, complete);
                    chunks.add(chunk);
                    chunkContent = "";
                    if (i < sentences.size() - 1) {
                        int nextSentencePage;
                        if (perPage != (nextSentencePage = sentences.get(i++).getPage())) {
                            perPage = nextSentencePage;
                            startCount = 1;
                        } else {
                            ++startCount;
                        }
                    }
                } else if (mergeChunkContent.length() > maxChunkLen) {
                    String maxChunkContent;
                    if (chunkContent.length() <= maxChunkLen && chunkComplete) {
                        maxChunkContent = chunkContent;
                        chunkContent = sentenceContent;
                    } else {
                        int lowerLastSplitMaxlenIndex = SentenceSplitter.findLowerLastSplitMaxlenIndex(mergeChunkContent, maxChunkLen);
                        int splitLen = lowerLastSplitMaxlenIndex < 0 ? maxChunkLen : lowerLastSplitMaxlenIndex;
                        maxChunkContent = mergeChunkContent.substring(0, splitLen);
                        chunkContent = mergeChunkContent.substring(splitLen);
                    }
                    maxChunkContent = SentenceSplitter.chunkTextPreprocess(maxChunkContent);
                    Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, maxChunkContent, SentenceSplitter.isComplete(maxChunkContent));
                    chunks.add(chunk);
                    if (perPage != sentencePage) {
                        startCount = 1;
                        perPage = sentencePage;
                    } else {
                        ++startCount;
                    }
                } else {
                    chunkContent = mergeChunkContent;
                    chunkComplete = complete;
                }
                ChunkService.batchConditionInsert(chunks);
            }
            if (StringUtils.isNotEmpty((String)chunkContent)) {
                while (chunkContent.length() > maxChunkLen) {
                    int lowerLastSplitMaxlenIndex = SentenceSplitter.findLowerLastSplitMaxlenIndex(chunkContent, maxChunkLen);
                    int splitLen = lowerLastSplitMaxlenIndex < 0 ? maxChunkLen : lowerLastSplitMaxlenIndex;
                    String maxChunkContent = chunkContent.substring(0, splitLen);
                    chunkContent = chunkContent.substring(splitLen);
                    maxChunkContent = SentenceSplitter.chunkTextPreprocess(maxChunkContent);
                    Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, maxChunkContent, SentenceSplitter.isComplete(maxChunkContent));
                    chunks.add(chunk);
                    ++startCount;
                }
                chunkContent = SentenceSplitter.chunkTextPreprocess(chunkContent);
                Chunk chunk = new Chunk(repositoryId, fileId, perPage, startCount, chunkContent, chunkComplete);
                chunks.add(chunk);
            }
            ChunkService.insert(chunks);
        }
        return true;
    }
}

