/*
 * Decompiled with CFR 0.152.
 */
package kd.ai.gai.core.rag.parser;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import kd.ai.gai.core.code.GaiErrorCode;
import kd.ai.gai.core.code.GaiExceptionUtil;
import kd.ai.gai.core.domain.dto.Chunk;
import kd.ai.gai.core.rag.chunk.ChunkInput;
import kd.ai.gai.core.rag.parser.AbstractParser;
import kd.ai.gai.core.rag.split.SentenceSplitter;
import kd.ai.gai.core.service.ChunkService;
import kd.bos.db.DB;
import kd.bos.db.DBRoute;
import kd.bos.exception.KDBizException;
import kd.bos.fileservice.FileServiceFactory;
import kd.bos.util.StringUtils;
import org.apache.commons.io.IOUtils;

public class JsonCommunityBaseParser
extends AbstractParser {
    @Override
    public boolean opreate(ChunkInput file) throws IOException {
        String text;
        try (InputStream in = FileServiceFactory.getAttachmentFileService().getInputStream(file.getFilePath());){
            text = IOUtils.toString((InputStream)in, (String)"UTF-8");
        }
        JSONObject chunkJson = JSON.parseObject((String)text);
        String title = chunkJson.getString("title");
        String pre = chunkJson.getString("classify") + " " + title;
        String content = chunkJson.getString("content");
        JSONArray tableList = chunkJson.getJSONArray("table_list");
        String url = chunkJson.getString("url");
        if (StringUtils.isEmpty((String)url) || StringUtils.isEmpty((String)content) && tableList.isEmpty()) {
            throw new KDBizException(GaiExceptionUtil.buildErr(GaiErrorCode.REPO_FILE_SOURCE_ERR), new Object[0]);
        }
        ArrayList<Chunk> result = new ArrayList<Chunk>(10);
        content = SentenceSplitter.textDataPreprocess(content);
        int i = 0;
        int startIndex = 0;
        int length = content.length();
        int maxLength = file.getChunkConfig().getLength();
        while (startIndex < length) {
            int endIndex = Math.min(startIndex + maxLength, length);
            String subtext = content.substring(startIndex, endIndex);
            subtext = SentenceSplitter.chunkTextPreprocess(subtext);
            Chunk chunk = new Chunk(file, 1);
            chunk.setOrder(++i);
            chunk.setChunk(pre + subtext);
            result.add(chunk);
            startIndex = endIndex;
        }
        for (int j = 0; j < tableList.size(); ++j) {
            String mdStr = String.valueOf(tableList.get(j));
            int mdlength = mdStr.length();
            if (mdlength > maxLength) {
                List<String> mds = JsonCommunityBaseParser.splitMarkdown(pre, mdStr, maxLength);
                for (String md : mds) {
                    Chunk chunk = new Chunk(file, 1);
                    chunk.setOrder(++i);
                    chunk.setChunk(md);
                    result.add(chunk);
                }
                continue;
            }
            Chunk chunk = new Chunk(file, 1);
            chunk.setOrder(++i);
            chunk.setChunk(pre + tableList.get(j));
            result.add(chunk);
        }
        ChunkService.batchInsert(result);
        DB.update((DBRoute)DBRoute.of((String)"aidb"), (String)"update t_gai_repo_doc_manage set ftitle=?,furl=? where fentryid=?", (Object[])new Object[]{title, url, file.getFileId()});
        return true;
    }

    private static List<String> splitMarkdown(String pre, String text, int chunkLength) {
        String[] lines = text.split("\n");
        ArrayList<String> blocks = new ArrayList<String>();
        StringBuilder block = new StringBuilder(pre);
        for (String line : lines) {
            if (block.length() + line.length() > chunkLength) {
                blocks.add(block.toString());
                block.setLength(0);
                block.append(pre);
            }
            block.append(line).append('\n');
        }
        if (block.length() - (pre.length() + 1) > 0) {
            blocks.add(block.toString());
        }
        return blocks;
    }
}

