package opennlp.tools.formats.masc;

import java.io.IOException;
import java.util.List;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:lib/opennlp-tools-2.5.4.jar:opennlp/tools/formats/masc/MascTokenSampleStream.class */
public class MascTokenSampleStream extends FilterObjectStream<MascDocument, TokenSample> {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) MascTokenSampleStream.class);
    private MascDocument buffer;

    public MascTokenSampleStream(ObjectStream<MascDocument> objectStream) throws IOException {
        super(objectStream);
        do {
            try {
                this.buffer = objectStream.read();
            } catch (Exception e) {
                throw new IOException("None of the documents has Penn tokenization" + e.getMessage());
            }
        } while (!this.buffer.hasPennTags());
    }

    @Override // opennlp.tools.util.ObjectStream
    public TokenSample read() throws IOException {
        String tokenText;
        List<Span> tokensSpans;
        boolean z = true;
        do {
            try {
                MascSentence read = this.buffer.read();
                while (read == null) {
                    this.buffer = (MascDocument) this.samples.read();
                    if (this.buffer == null) {
                        return null;
                    }
                    if (this.buffer.hasPennTags()) {
                        read = this.buffer.read();
                    }
                }
                tokenText = read.getTokenText();
                tokensSpans = read.getTokensSpans();
                if (tokenText.length() == 0) {
                    logger.warn("Zero sentence found. There is a sentence without any tokens. sentence: {}, spans: {}", tokenText, tokensSpans);
                    z = false;
                }
                for (int i = 0; i < tokensSpans.size(); i++) {
                    Span span = tokensSpans.get(i);
                    if (span.getEnd() - span.getStart() == 0) {
                        logger.warn("Zero token found. There is a token without any quarks. sentence: {}, spans: {}", tokenText, tokensSpans);
                        z = false;
                    }
                }
            } catch (IOException e) {
                throw new IOException("Could not get a sample of tokens from the data.");
            }
        } while (!z);
        Span[] spanArr = new Span[tokensSpans.size()];
        tokensSpans.toArray(spanArr);
        return new TokenSample(tokenText, spanArr);
    }

    @Override // opennlp.tools.util.FilterObjectStream, opennlp.tools.util.ObjectStream
    public void reset() throws IOException, UnsupportedOperationException {
        this.samples.reset();
        this.buffer = (MascDocument) this.samples.read();
    }
}
