package dev.langchain4j.data.document.parser.apache.tika;

import dev.langchain4j.data.document.BlankDocumentException;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.internal.Utils;
import java.io.InputStream;
import java.util.HashMap;
import java.util.function.Supplier;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:lib/langchain4j-document-parser-apache-tika-1.1.0-beta7.jar:dev/langchain4j/data/document/parser/apache/tika/ApacheTikaDocumentParser.class */
public class ApacheTikaDocumentParser implements DocumentParser {
    private static final int NO_WRITE_LIMIT = -1;
    public static final Supplier<Parser> DEFAULT_PARSER_SUPPLIER = AutoDetectParser::new;
    public static final Supplier<Metadata> DEFAULT_METADATA_SUPPLIER = Metadata::new;
    public static final Supplier<ParseContext> DEFAULT_PARSE_CONTEXT_SUPPLIER = ParseContext::new;
    public static final Supplier<ContentHandler> DEFAULT_CONTENT_HANDLER_SUPPLIER = () -> {
        return new BodyContentHandler(-1);
    };
    private final Supplier<Parser> parserSupplier;
    private final Supplier<ContentHandler> contentHandlerSupplier;
    private final Supplier<Metadata> metadataSupplier;
    private final Supplier<ParseContext> parseContextSupplier;
    private final boolean includeMetadata;

    public ApacheTikaDocumentParser() {
        this(false);
    }

    public ApacheTikaDocumentParser(boolean z) {
        this(null, null, null, null, z);
    }

    @Deprecated(forRemoval = true)
    public ApacheTikaDocumentParser(Parser parser, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
        this(() -> {
            return (Parser) Utils.getOrDefault(parser, DEFAULT_PARSER_SUPPLIER);
        }, () -> {
            return (ContentHandler) Utils.getOrDefault(contentHandler, DEFAULT_CONTENT_HANDLER_SUPPLIER);
        }, () -> {
            return (Metadata) Utils.getOrDefault(metadata, DEFAULT_METADATA_SUPPLIER);
        }, () -> {
            return (ParseContext) Utils.getOrDefault(parseContext, DEFAULT_PARSE_CONTEXT_SUPPLIER);
        }, false);
    }

    @Deprecated(forRemoval = true)
    public ApacheTikaDocumentParser(Supplier<Parser> supplier, Supplier<ContentHandler> supplier2, Supplier<Metadata> supplier3, Supplier<ParseContext> supplier4) {
        this(supplier, supplier2, supplier3, supplier4, false);
    }

    public ApacheTikaDocumentParser(Supplier<Parser> supplier, Supplier<ContentHandler> supplier2, Supplier<Metadata> supplier3, Supplier<ParseContext> supplier4, boolean z) {
        this.parserSupplier = (Supplier) Utils.getOrDefault(supplier, (Supplier<Supplier<Parser>>) () -> {
            return DEFAULT_PARSER_SUPPLIER;
        });
        this.contentHandlerSupplier = (Supplier) Utils.getOrDefault(supplier2, (Supplier<Supplier<ContentHandler>>) () -> {
            return DEFAULT_CONTENT_HANDLER_SUPPLIER;
        });
        this.metadataSupplier = (Supplier) Utils.getOrDefault(supplier3, (Supplier<Supplier<Metadata>>) () -> {
            return DEFAULT_METADATA_SUPPLIER;
        });
        this.parseContextSupplier = (Supplier) Utils.getOrDefault(supplier4, (Supplier<Supplier<ParseContext>>) () -> {
            return DEFAULT_PARSE_CONTEXT_SUPPLIER;
        });
        this.includeMetadata = z;
    }

    @Override // dev.langchain4j.data.document.DocumentParser
    public Document parse(InputStream inputStream) {
        try {
            Parser parser = this.parserSupplier.get();
            ContentHandler contentHandler = this.contentHandlerSupplier.get();
            Metadata metadata = this.metadataSupplier.get();
            parser.parse(inputStream, contentHandler, metadata, this.parseContextSupplier.get());
            String contentHandler2 = contentHandler.toString();
            if (Utils.isNullOrBlank(contentHandler2)) {
                throw new BlankDocumentException();
            }
            return this.includeMetadata ? Document.from(contentHandler2, convert(metadata)) : Document.from(contentHandler2);
        } catch (BlankDocumentException e) {
            throw e;
        } catch (ZeroByteFileException e2) {
            throw new BlankDocumentException();
        } catch (Exception e3) {
            throw new RuntimeException(e3);
        }
    }

    private dev.langchain4j.data.document.Metadata convert(Metadata metadata) {
        HashMap hashMap = new HashMap();
        for (String str : metadata.names()) {
            hashMap.put(str, String.join(";", metadata.getValues(str)));
        }
        return new dev.langchain4j.data.document.Metadata(hashMap);
    }
}
