/*
 * Decompiled with CFR 0.152.
 */
package kd.bos.gptas.kmbase.parser;

import java.io.IOException;
import java.io.InputStream;
import kd.bos.gptas.kmbase.parser.AbstractParser;
import kd.bos.gptas.kmbase.parser.DocumentFormatUtils;
import kd.bos.logging.Log;
import kd.bos.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

public class PDFParser
extends AbstractParser {
    private static final Log logger = LogFactory.getLog(PDFParser.class);

    @Override
    public String parseText(InputStream in) throws IOException {
        PDDocument document = PDDocument.load((InputStream)in);
        final StringBuilder sb = new StringBuilder(16);
        PDFTextStripper stripper = new PDFTextStripper(){

            protected void processTextPosition(TextPosition text) {
                if (text.getTextMatrix().getShearX() == 0.0f && text.getTextMatrix().getShearY() == 0.0f) {
                    super.processTextPosition(text);
                } else {
                    sb.append(text.toString());
                }
            }
        };
        stripper.setSortByPosition(true);
        String content = stripper.getText(document);
        content = content.replace("\u0000", "");
        if (sb.length() > 0) {
            logger.warn("\u5b58\u5728\u6c34\u5370\u5185\u5bb9,\u5df2\u5ffd\u7565\uff1a{}", (Object)sb.toString());
        }
        return DocumentFormatUtils.documentClear(content);
    }
}

