package kd.ai.gai.mservice;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import kd.ai.gai.core.domain.dto.Chunk;
import kd.ai.gai.core.domain.dto.PdfOcrData;
import kd.ai.gai.core.domain.vo.RepoOcrAsyncVO;
import kd.ai.gai.core.engine.json.JsonUtil;
import kd.ai.gai.core.enuz.LLM;
import kd.ai.gai.core.enuz.repo.ChunkRule;
import kd.ai.gai.core.enuz.repo.SpliteType;
import kd.ai.gai.core.rag.Sentence;
import kd.ai.gai.core.rag.SplitConfig;
import kd.ai.gai.core.rag.service.RepoDispatchService;
import kd.ai.gai.core.rag.split.AbstractSplitter;
import kd.ai.gai.core.rag.split.SplitFactory;
import kd.ai.gai.core.service.ChunkService;
import kd.ai.gai.core.service.RepoService;
import kd.bos.context.RequestContext;
import kd.bos.logging.Log;
import kd.bos.logging.LogFactory;
import kd.bos.util.StringUtils;

/* loaded from: input_file:kd/ai/gai/mservice/GaiRagCallBackServiceImpl.class */
public class GaiRagCallBackServiceImpl implements GaiRagCallBackService {
    private static final Log logger = LogFactory.getLog(GaiRagCallBackServiceImpl.class);
    private AbstractSplitter splitter = SplitFactory.getSplit(SpliteType.SENTENCE);

    public void ragOcrRagNotify(String str) {
        logger.info("【知识库-文档处理】GaiRagCallBackService({}) ragOcrRagNotify:{}", RequestContext.get().getTraceId(), str);
        LinkedList linkedList = new LinkedList();
        PdfOcrData pdfOcrData = (PdfOcrData) JsonUtil.fromJson(str, PdfOcrData.class);
        Long bizTaskId = pdfOcrData.getBizTaskId();
        RepoOcrAsyncVO repoInfoByFileId = RepoService.getRepoInfoByFileId(bizTaskId.longValue());
        Long repoId = repoInfoByFileId.getRepoId();
        LLM llm = repoInfoByFileId.getLlm();
        int maxSize = ChunkRule.parse(llm, repoInfoByFileId.getChunkStrategy()).getMaxSize();
        List<PdfOcrData.PageInfo> pdfContents = pdfOcrData.getPdfContents();
        if (pdfContents != null && !pdfContents.isEmpty()) {
            for (PdfOcrData.PageInfo pageInfo : pdfContents) {
                int pageNum = pageInfo.getPageNum();
                String pageDocument = pageInfo.getPageDocument();
                if (StringUtils.isNotEmpty(pageDocument)) {
                    int i = 1;
                    ArrayList split = this.splitter.split(pageNum, pageDocument);
                    if (!linkedList.isEmpty() && !split.isEmpty()) {
                        int size = linkedList.size() - 1;
                        Chunk chunk = (Chunk) linkedList.get(size);
                        if (!chunk.isChunkComplete() || (chunk.getChunk() + ((Sentence) split.get(0)).getContent()).length() <= maxSize) {
                            i = chunk.getOrder();
                            split.add(0, new Sentence(chunk.getPage(), chunk.getChunk(), chunk.isChunkComplete()));
                            linkedList.remove(size);
                            ChunkService.batchConditionInsert(linkedList);
                        }
                    }
                    if (!split.isEmpty()) {
                        linkedList.addAll(this.splitter.merge(new SplitConfig(repoId.longValue(), bizTaskId.longValue(), i, maxSize), split));
                    }
                } else {
                    logger.info("【知识库-文档处理】file:{} ,第{}页非可解析内容，OCR结果为空", bizTaskId, Integer.valueOf(pageNum));
                }
            }
        }
        ChunkService.batchInsert(linkedList);
        logger.info("file:{}文件chunk处理完成,共{}chunk", bizTaskId, Integer.valueOf(linkedList.size()));
        RepoDispatchService.addEmbeddingTask(repoId.longValue(), llm, bizTaskId.longValue());
    }
}
