package kd.bos.gptas.km.knowledage;

import com.alibaba.fastjson.JSONObject;
import com.alibaba.nacos.common.utils.ThreadUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import kd.bos.context.RequestContext;
import kd.bos.data.BusinessDataReader;
import kd.bos.dataentity.OperateOption;
import kd.bos.dataentity.entity.DynamicObject;
import kd.bos.dataentity.entity.DynamicObjectCollection;
import kd.bos.dataentity.metadata.dynamicobject.DynamicObjectType;
import kd.bos.dataentity.resource.ResManager;
import kd.bos.dataentity.utils.StringUtils;
import kd.bos.entity.EntityMetadataCache;
import kd.bos.gptas.km.File.FileInfoHandle;
import kd.bos.gptas.km.article.Child;
import kd.bos.gptas.km.article.Classifies;
import kd.bos.gptas.km.article.Content;
import kd.bos.gptas.km.article.SearchResult;
import kd.bos.gptas.km.splitter.TextSplitter;
import kd.bos.gptas.km.splitter.TextSplitterProvider;
import kd.bos.gptas.milvus.Chunk;
import kd.bos.gptas.milvus.MilvusDao;
import kd.bos.orm.query.QFilter;
import kd.bos.servicehelper.operation.OperationServiceHelper;
import kd.bos.threads.ThreadPools;
import kd.bos.util.HttpClientUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:kd/bos/gptas/km/knowledage/ArticleHandler.class */
public class ArticleHandler {
    private static final Logger log = LoggerFactory.getLogger(ArticleHandler.class);
    private static final String BOS_DEVPORTAL_GPTAS = "bos-devportal-gptas";
    private static final String ARTICLE_URL = "https://vip.kingdee.com/knowledge/%d";

    public static void importArticle(List<String> list, Map<String, Object> map) {
        if (!FileInfoHandle.getAssistantInfo(map)) {
            log.warn(ResManager.loadKDString("知识库索引或者问答产品字段为空，请先设置应用助手的知识库索引或者问答产品字段。", "ArticleHandler_3", BOS_DEVPORTAL_GPTAS, new Object[0]));
            return;
        }
        CloudAPIHelper cloudAPIHelper = new CloudAPIHelper();
        String str = (String) map.get("productline");
        if (StringUtils.isNotBlank(str)) {
            String[] split = str.split(",");
            for (String str2 : list) {
                ThreadPools.executeOnce("knowledage-import", () -> {
                    for (String str3 : split) {
                        log.info(str2 + " 已获取" + downloadArticles(str2, cloudAPIHelper, map, str3) + "篇文章");
                    }
                });
            }
        }
    }

    private static int downloadArticles(String str, CloudAPIHelper cloudAPIHelper, Map<String, Object> map, String str2) {
        SearchResult searchResult;
        List<Content> content;
        int i = 1;
        int i2 = 0;
        while (true) {
            String str3 = "category_code=developer01&series_code=" + str2 + "&page=" + (i + 1) + "&pageSize=50&domain=" + str;
            try {
                searchResult = (SearchResult) JSONObject.parseObject(HttpClientUtils.get("https://api.kingdee.com/knowledge/productKnowledge?" + str3, cloudAPIHelper.getGetRequestHeader(str2, i, str, "/knowledge/productKnowledge", str3), (Map) null), SearchResult.class);
                content = searchResult.getContent();
            } catch (Exception e) {
                log.error("downloadArticles error {}", e.getMessage());
            }
            if (content.isEmpty()) {
                log.info("获取文章数量为0,结束.");
                break;
            }
            if (searchResult.getTotalPages().intValue() >= 50) {
                log.warn("取数超过50页,请重新设置过滤条件" + str3);
            }
            if (searchResult.getNumber().intValue() < i) {
                log.info("无法获取新的页码.已获取" + i + " 页数据");
                break;
            }
            i2 += saveContentByList(content, map);
            if (searchResult.isLast()) {
                log.info("获取结束.已获取" + i + " 页数据");
                break;
            }
            i++;
        }
        return i2;
    }

    private static String normalizeFileName(String str) {
        return str.replaceAll("[/\\\\:*?\"<>|]", "").replace("\r", "").replace("\n", "");
    }

    private static int saveContentByList(List<Content> list, Map<String, Object> map) {
        return saveContentByList(list, 0, map);
    }

    private static int saveContentByList(List<Content> list, int i, Map<String, Object> map) {
        Elements elementsByTag;
        int i2 = 0;
        ArrayList arrayList = new ArrayList();
        Iterator<Content> it = list.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Content next = it.next();
            if (next.getTitle() == null) {
                i2++;
            } else if (next.getEntity_type().equals("Knowledge")) {
                String format = String.format(ARTICLE_URL, Long.valueOf(next.getId()));
                String str = HttpClientUtils.get(format, (Map) null, 5000, 8000);
                if (str == null) {
                    ThreadUtils.sleep(1000L);
                    str = HttpClientUtils.get(format, (Map) null, (Map) null);
                }
                if (str == null) {
                    log.error("获取文章详情为空。");
                } else {
                    Document parse = Jsoup.parse(str);
                    if (parse.getElementsByAttribute("data-n-head").isEmpty()) {
                        int i3 = 5 + i;
                        if (i > 5) {
                            log.error("程序被锁定,超过10次，终止程序，下次重新启动。");
                        }
                        log.info("程序被锁定,获取文章" + next.getId() + next.getTitle() + "等待" + i3 + "秒重试.....");
                        ThreadUtils.sleep(i3 * 1000);
                        i2 = saveContentByList(list, i + 1, map);
                    } else {
                        boolean z = false;
                        if (next.getEntity_type().equals("Knowledge")) {
                            String knowledgeContentType = next.getKnowledgeContentType();
                            if ("Article".equals(knowledgeContentType)) {
                                String dataAttributeValue = getDataAttributeValue(parse, ResManager.loadKDString("文章详情", "ArticleHandler_0", BOS_DEVPORTAL_GPTAS, new Object[0]));
                                if (dataAttributeValue != null) {
                                    saveToFile(next, dataAttributeValue, map, arrayList);
                                    i2++;
                                    z = true;
                                }
                            } else if ("Answer".equals(knowledgeContentType) && (elementsByTag = parse.getElementsByTag("main")) != null) {
                                saveToFile(next, String.format("<div><p>%1$s:</p>%2$s\r\n<p>%3$s:</p>", ResManager.loadKDString("问题详情", "ArticleHandler_1", BOS_DEVPORTAL_GPTAS, new Object[0]), ResManager.loadKDString("回答", "ArticleHandler_2", BOS_DEVPORTAL_GPTAS, new Object[0]), getDataAttributeValue(parse, ResManager.loadKDString("文章详情", "ArticleHandler_0", BOS_DEVPORTAL_GPTAS, new Object[0]))) + elementsByTag.outerHtml() + "</div>", map, arrayList);
                                i2++;
                                z = true;
                            }
                        }
                        if (!z) {
                            log.warn("文章不对" + next.getId() + next.getTitle() + "是视频，忽略。");
                            i2++;
                        }
                    }
                }
            } else {
                log.warn("位置文章类型" + next.getEntity_type());
                i2++;
            }
        }
        if (i2 != list.size()) {
            log.info("文章数量不对:" + list.size() + " " + i2);
        }
        if (!arrayList.isEmpty()) {
            insert2VectorLib(map, saveKnowledage(arrayList));
        }
        return i2;
    }

    private static String getDataAttributeValue(Document document, String str) {
        Iterator it = document.getElementsByAttribute("data-n-head").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (element.attributes().get("name").equals(str)) {
                return element.attributes().get("content");
            }
        }
        return null;
    }

    private static void saveToFile(Content content, String str, Map<String, Object> map, List<DynamicObject> list) {
        DynamicObject createKnlInstance = createKnlInstance(content, str, map);
        if (createKnlInstance == null) {
            return;
        }
        list.add(createKnlInstance);
        if (list.size() >= 50) {
            insert2VectorLib(map, saveKnowledage(list));
        }
    }

    private static DynamicObject createKnlInstance(Content content, String str, Map<String, Object> map) {
        DynamicObject dynamicObject;
        long id = content.getId();
        long knowledgeUpdatedAt = content.getKnowledgeUpdatedAt();
        Map loadFromCache = BusinessDataReader.loadFromCache("knl_corpus", new QFilter[]{new QFilter("number", "=", String.valueOf(id))});
        Optional findFirst = loadFromCache.values().stream().findFirst();
        if (findFirst.isPresent() && knowledgeUpdatedAt <= ((DynamicObject) findFirst.get()).getLong("updatetime")) {
            return null;
        }
        if (loadFromCache.isEmpty()) {
            dynamicObject = (DynamicObject) EntityMetadataCache.getDataEntityType("knl_corpus").createInstance();
        } else {
            if (loadFromCache.size() != 1) {
                log.warn("存在多条相同编码的知识");
                return null;
            }
            dynamicObject = (DynamicObject) loadFromCache.values().stream().findFirst().get();
        }
        dynamicObject.set("number", Long.valueOf(id));
        dynamicObject.set("updatetime", Long.valueOf(knowledgeUpdatedAt));
        dynamicObject.set("datasource_id", Long.valueOf(getDataSourceId(content)));
        dynamicObject.set("filepath", String.format(ARTICLE_URL, Long.valueOf(id)));
        dynamicObject.set("name", getFileName(content));
        dynamicObject.set("status", "B");
        dynamicObject.set("enable", "1");
        dynamicObject.set("uploadstatus", "I");
        dynamicObject.set("creator", Long.valueOf(Long.parseLong(RequestContext.get().getUserId())));
        dynamicObject.set("group_id", getGroup(content, (String) map.get("type")));
        try {
            String extract = new HtmlHandler().extract(str);
            dynamicObject.set("inputcontent_tag", extract);
            dynamicObject.set("segmententity", getEntryDynamicObjectCollection(extract, dynamicObject, map));
            return dynamicObject;
        } catch (IOException e) {
            return null;
        }
    }

    private static String getFileName(Content content) {
        String title = content.getTitle();
        if (title == null) {
            title = String.valueOf(content.getId());
        }
        return normalizeFileName(title);
    }

    private static long getDataSourceId(Content content) {
        long j = 0;
        String knowledgeContentType = content.getKnowledgeContentType();
        if ("Article".equals(knowledgeContentType)) {
            j = 1000;
        } else if ("Answer".equals(knowledgeContentType)) {
            j = 1001;
        } else {
            log.info("类型不对");
        }
        return j;
    }

    private static Object getGroup(Content content, String str) {
        Object obj = 1865176043442328718L;
        Map loadFromCache = BusinessDataReader.loadFromCache("knl_group", new QFilter[]{new QFilter("longnumber", "=", getCode(content.getClassifies())), new QFilter("type", "=", str)});
        if (!loadFromCache.isEmpty()) {
            Iterator it = loadFromCache.keySet().iterator();
            while (it.hasNext()) {
                obj = it.next();
            }
        }
        return obj;
    }

    private static String getCode(List<Classifies> list) {
        Child child;
        StringBuilder sb = new StringBuilder();
        if (!list.isEmpty() && (child = list.get(list.size() - 1).getChild()) != null) {
            if ("domain".equalsIgnoreCase(child.getType())) {
                sb.append(child.getCode());
            }
            Child child2 = child.getChild();
            if (child2 != null && "module".equalsIgnoreCase(child2.getType())) {
                sb.append(".").append(child2.getCode());
            }
        }
        return sb.toString();
    }

    private static DynamicObjectCollection getEntryDynamicObjectCollection(String str, DynamicObject dynamicObject, Map<String, Object> map) {
        List<Object> split = new TextSplitter(TextSplitterProvider.getRecursiveCharacterTextSplitter((String) map.get("type"))).split(str, 0);
        DynamicObjectCollection dynamicObjectCollection = new DynamicObjectCollection();
        DynamicObjectType dynamicObjectType = (DynamicObjectType) dynamicObject.getDynamicObjectType().getAllEntities().get("segmententity");
        for (Object obj : split) {
            DynamicObject dynamicObject2 = new DynamicObject(dynamicObjectType);
            dynamicObject2.set("segment_tag", obj);
            dynamicObject2.set("segenable", '1');
            dynamicObjectCollection.add(dynamicObject2);
        }
        return dynamicObjectCollection;
    }

    private static List<Object> saveKnowledage(List<DynamicObject> list) {
        List<Object> successPkIds = OperationServiceHelper.executeOperate("save", "knl_corpus", (DynamicObject[]) list.toArray(new DynamicObject[0]), OperateOption.create()).getSuccessPkIds();
        log.info(String.format("本次操作共%d条数据，成功%d条", Integer.valueOf(list.size()), Integer.valueOf(successPkIds.size())));
        list.clear();
        return successPkIds;
    }

    private static void insert2VectorLib(Map<String, Object> map, List<Object> list) {
        String str = (String) map.get("indexmethod");
        if (StringUtils.isNotBlank(str)) {
            uploadToMilvus(list, str, map);
        }
    }

    private static void uploadToMilvus(List<Object> list, String str, Map<String, Object> map) {
        long longValue = ((Long) map.get("assistant_id")).longValue();
        String str2 = (String) map.get("type");
        MilvusDao create = MilvusDao.create(str);
        for (DynamicObject dynamicObject : BusinessDataReader.loadFromCache(list.toArray(), EntityMetadataCache.getDataEntityType("knl_corpus")).values()) {
            ArrayList arrayList = new ArrayList();
            Iterator it = dynamicObject.getDynamicObjectCollection("segmententity").iterator();
            while (it.hasNext()) {
                DynamicObject dynamicObject2 = (DynamicObject) it.next();
                Chunk chunk = new Chunk();
                chunk.setId(dynamicObject2.getLong("id"));
                chunk.setKNLId(Long.valueOf(dynamicObject.getLong("id")));
                chunk.setRepositoryId(longValue);
                chunk.setChunk(dynamicObject2.getString("segment_tag"));
                arrayList.add(chunk);
            }
            if (!arrayList.isEmpty()) {
                FileInfoHandle.batchInsertMilvus(create, str2, arrayList);
            }
        }
    }
}
