/*
 * Decompiled with CFR 0.152.
 */
package kd.ai.gai.mservice;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import kd.ai.gai.core.domain.dto.Chunk;
import kd.ai.gai.core.domain.dto.PdfOcrData;
import kd.ai.gai.core.domain.vo.RepoOcrAsyncVO;
import kd.ai.gai.core.engine.json.JsonUtil;
import kd.ai.gai.core.enuz.LLM;
import kd.ai.gai.core.enuz.repo.ChunkRule;
import kd.ai.gai.core.enuz.repo.SpliteType;
import kd.ai.gai.core.rag.Sentence;
import kd.ai.gai.core.rag.SplitConfig;
import kd.ai.gai.core.rag.service.RepoDispatchService;
import kd.ai.gai.core.rag.split.AbstractSplitter;
import kd.ai.gai.core.rag.split.SplitFactory;
import kd.ai.gai.core.service.ChunkService;
import kd.ai.gai.core.service.RepoService;
import kd.ai.gai.mservice.GaiRagCallBackService;
import kd.bos.context.RequestContext;
import kd.bos.logging.Log;
import kd.bos.logging.LogFactory;
import kd.bos.util.StringUtils;

public class GaiRagCallBackServiceImpl
implements GaiRagCallBackService {
    private static final Log logger = LogFactory.getLog(GaiRagCallBackServiceImpl.class);
    private AbstractSplitter splitter = SplitFactory.getSplit((SpliteType)SpliteType.SENTENCE);

    public void ragOcrRagNotify(String orcData) {
        logger.info("\u3010\u77e5\u8bc6\u5e93-\u6587\u6863\u5904\u7406\u3011GaiRagCallBackService({}) ragOcrRagNotify:{}", (Object)RequestContext.get().getTraceId(), (Object)orcData);
        LinkedList pdfAllChunks = new LinkedList();
        PdfOcrData pdfOcrData = (PdfOcrData)JsonUtil.fromJson((String)orcData, PdfOcrData.class);
        Long fileId = pdfOcrData.getBizTaskId();
        RepoOcrAsyncVO repoInfo = RepoService.getRepoInfoByFileId((long)fileId);
        Long repoId = repoInfo.getRepoId();
        LLM llm = repoInfo.getLlm();
        ChunkRule.ChunkStrategy chunkStrategy = repoInfo.getChunkStrategy();
        ChunkRule chunkRule = ChunkRule.parse((LLM)llm, (ChunkRule.ChunkStrategy)chunkStrategy);
        int maxChunkLen = chunkRule.getMaxSize();
        List pdfContents = pdfOcrData.getPdfContents();
        if (pdfContents != null && !pdfContents.isEmpty()) {
            for (PdfOcrData.PageInfo pdfContent : pdfContents) {
                int pageNum = pdfContent.getPageNum();
                String pageContent = pdfContent.getPageDocument();
                if (StringUtils.isNotEmpty((String)pageContent)) {
                    int perPageLastChunkIndex;
                    Chunk perPageLastChunk;
                    int startChunkOrder = 1;
                    ArrayList sentences = this.splitter.split(pageNum, pageContent);
                    if (!(pdfAllChunks.isEmpty() || sentences.isEmpty() || (perPageLastChunk = (Chunk)pdfAllChunks.get(perPageLastChunkIndex = pdfAllChunks.size() - 1)).isChunkComplete() && (perPageLastChunk.getChunk() + ((Sentence)sentences.get(0)).getContent()).length() > maxChunkLen)) {
                        startChunkOrder = perPageLastChunk.getOrder();
                        Sentence sentence = new Sentence(perPageLastChunk.getPage(), perPageLastChunk.getChunk(), perPageLastChunk.isChunkComplete());
                        sentences.add(0, sentence);
                        pdfAllChunks.remove(perPageLastChunkIndex);
                        ChunkService.batchConditionInsert(pdfAllChunks);
                    }
                    if (sentences.isEmpty()) continue;
                    SplitConfig splitConfig = new SplitConfig(repoId.longValue(), fileId.longValue(), startChunkOrder, maxChunkLen);
                    LinkedList currenPageChunks = this.splitter.merge(splitConfig, (List)sentences);
                    pdfAllChunks.addAll(currenPageChunks);
                    continue;
                }
                logger.info("\u3010\u77e5\u8bc6\u5e93-\u6587\u6863\u5904\u7406\u3011file:{} ,\u7b2c{}\u9875\u975e\u53ef\u89e3\u6790\u5185\u5bb9\uff0cOCR\u7ed3\u679c\u4e3a\u7a7a", (Object)fileId, (Object)pageNum);
            }
        }
        ChunkService.batchInsert(pdfAllChunks);
        logger.info("file:{}\u6587\u4ef6chunk\u5904\u7406\u5b8c\u6210,\u5171{}chunk", (Object)fileId, (Object)pdfAllChunks.size());
        RepoDispatchService.addEmbeddingTask((long)repoId, (LLM)llm, (long)fileId);
    }
}

