/*
 * Decompiled with CFR 0.152.
 */
package dev.langchain4j.data.document.parser;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.data.document.DocumentType;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.internal.ValidationUtils;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.POITextExtractor;

public class MsOfficeDocumentParser
implements DocumentParser {
    private final DocumentType documentType;

    public MsOfficeDocumentParser(DocumentType documentType) {
        this.documentType = (DocumentType)((Object)ValidationUtils.ensureNotNull((Object)((Object)documentType), (String)"documentType"));
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public Document parse(InputStream inputStream) {
        try (POITextExtractor extractor = ExtractorFactory.createExtractor((InputStream)inputStream);){
            String text = extractor.getText();
            Document document = new Document(text, new Metadata().add("document_type", (Object)this.documentType));
            return document;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

