/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.commons.opennlp;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Sequence;
import opennlp.tools.util.Span;
import org.apache.stanbol.commons.opennlp.KeywordTokenizer;
import org.apache.stanbol.commons.opennlp.OpenNLP;
import org.apache.stanbol.commons.opennlp.PosTypeChunker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TextAnalyzer {
    private static final Logger log = LoggerFactory.getLogger(TextAnalyzer.class);
    private final OpenNLP openNLP;
    private final TextAnalyzerConfig config;
    private POSTaggerME posTagger;
    private boolean posTaggerNotAvailable;
    private SentenceDetector sentenceDetector;
    private boolean sentenceDetectorNotAvailable;
    private ChunkerME chunker;
    private boolean chunkerNotAvailable;
    private PosTypeChunker posTypeChunker;
    private boolean posTypeChunkerNotAvailable;
    private Tokenizer tokenizer;
    private final String language;

    public TextAnalyzer(OpenNLP openNLP, String language) {
        this(openNLP, language, null);
    }

    public TextAnalyzer(OpenNLP openNLP, String language, TextAnalyzerConfig config) {
        if (openNLP == null) {
            throw new IllegalArgumentException("The OpenNLP component MUST NOT be NULL");
        }
        this.config = config == null ? new TextAnalyzerConfig() : config;
        this.openNLP = openNLP;
        this.language = language;
    }

    protected final POSTaggerME getPosTagger() {
        if (!this.config.enablePosTagger) {
            return null;
        }
        if (this.posTagger == null && !this.posTaggerNotAvailable) {
            try {
                POSModel posModel = this.openNLP.getPartOfSpeachModel(this.language);
                if (posModel != null) {
                    this.posTagger = new POSTaggerME(posModel);
                } else {
                    log.debug("No POS Model for language '{}'", (Object)this.language);
                    this.posTaggerNotAvailable = true;
                }
            }
            catch (IOException e) {
                log.info("Unable to load POS Model for language '" + this.language + "'", (Throwable)e);
                this.posTaggerNotAvailable = true;
            }
        }
        return this.posTagger;
    }

    public final Tokenizer getTokenizer() {
        if (this.tokenizer == null) {
            if (this.config.forceSimpleTokenizer) {
                this.tokenizer = SimpleTokenizer.INSTANCE;
            } else if (this.config.forceKeywordTokenizer) {
                this.tokenizer = KeywordTokenizer.INSTANCE;
            } else {
                this.tokenizer = this.openNLP.getTokenizer(this.language);
                if (this.tokenizer == null) {
                    log.debug("No Tokenizer for Language '{}': fall back to SimpleTokenizer!", (Object)this.language);
                    this.tokenizer = SimpleTokenizer.INSTANCE;
                }
            }
        }
        return this.tokenizer;
    }

    protected final ChunkerME getChunker() {
        if (!this.config.enableChunker || this.config.forcePosTypeChunker) {
            return null;
        }
        if (this.chunker == null && !this.chunkerNotAvailable) {
            try {
                ChunkerModel chunkerModel = this.openNLP.getChunkerModel(this.language);
                if (chunkerModel != null) {
                    this.chunker = new ChunkerME(chunkerModel);
                } else {
                    log.debug("No Chunker Model for language {}", (Object)this.language);
                    this.chunkerNotAvailable = true;
                }
            }
            catch (IOException e) {
                log.info("Unable to load Chunker Model for language " + this.language, (Throwable)e);
                this.chunkerNotAvailable = true;
            }
        }
        return this.chunker;
    }

    protected final PosTypeChunker getPosTypeChunker() {
        if (!this.config.enableChunker || !this.config.enablePosTagger) {
            return null;
        }
        if (this.posTypeChunker == null && !this.posTypeChunkerNotAvailable) {
            this.posTypeChunker = PosTypeChunker.getInstance(this.language, this.config.minPosTagProbability);
            this.posTypeChunkerNotAvailable = this.posTypeChunker == null;
        }
        return this.posTypeChunker;
    }

    protected final SentenceDetector getSentenceDetector() {
        if (!this.config.enableSentenceDetector) {
            return null;
        }
        if (this.sentenceDetector == null && !this.sentenceDetectorNotAvailable) {
            try {
                SentenceModel sentModel = this.openNLP.getSentenceModel(this.language);
                if (sentModel != null) {
                    this.sentenceDetector = new SentenceDetectorME(sentModel);
                } else {
                    log.debug("No Sentence Detection Model for language '{}'", (Object)this.language);
                    this.sentenceDetectorNotAvailable = true;
                }
            }
            catch (IOException e) {
                log.info("Unable to load Sentence Detection Model for language '" + this.language + "'", (Throwable)e);
                this.sentenceDetectorNotAvailable = true;
            }
        }
        return this.sentenceDetector;
    }

    public final OpenNLP getOpenNLP() {
        return this.openNLP;
    }

    public final TextAnalyzerConfig getConfig() {
        return this.config;
    }

    public final String getLanguage() {
        return this.language;
    }

    public AnalysedText analyseSentence(String sentence) {
        return new AnalysedText(sentence, this.language);
    }

    public Iterator<AnalysedText> analyse(String text) {
        return new TextAnalysisIterator(text, this.language);
    }

    public class AnalysedText {
        protected final String sentence;
        protected final List<Token> tokens;
        protected final List<Chunk> chunks;
        private final int offset;
        protected String language;

        private AnalysedText(String sentence, String language) {
            this(sentence, language, 0);
        }

        private AnalysedText(String sentence, String language, int offset) {
            double[] chunkProps;
            Span[] chunkSpans;
            Object posProbs;
            String[][] posTags;
            if (sentence == null || sentence.isEmpty()) {
                throw new IllegalArgumentException("The parsed Sentence MUST NOT be NULL nor empty!");
            }
            this.sentence = sentence;
            if (language == null || language.isEmpty()) {
                throw new IllegalArgumentException("The parsed language MUST NOT be NULL nor empty");
            }
            this.language = language;
            if (offset < 0) {
                throw new IllegalArgumentException("The parsed offset MUST NOT be a negative number (offset=" + offset + ")");
            }
            this.offset = offset;
            Span[] tokenSpans = TextAnalyzer.this.getTokenizer().tokenizePos(sentence);
            POSTaggerME tagger = TextAnalyzer.this.getPosTagger();
            ChunkerME chunker = TextAnalyzer.this.getChunker();
            PosTypeChunker posTypeChunker = TextAnalyzer.this.getPosTypeChunker();
            String[] tokens = new String[tokenSpans.length];
            for (int ti = 0; ti < tokenSpans.length; ++ti) {
                tokens[ti] = tokenSpans[ti].getCoveredText((CharSequence)sentence).toString();
            }
            if (tagger != null) {
                posTags = new String[tokens.length][];
                posProbs = new double[tokens.length][];
                Sequence[] posSequences = tagger.topKSequences(tokens);
                String[] actPos = new String[posSequences.length];
                double[] actProp = new double[posSequences.length];
                for (int i = 0; i < tokenSpans.length; ++i) {
                    boolean done = false;
                    int j = 0;
                    while (j < posSequences.length && !done) {
                        String p = (String)posSequences[j].getOutcomes().get(i);
                        done = j > 0 && p.equals(actPos[0]);
                        if (done) continue;
                        actPos[j] = p;
                        actProp[j] = posSequences[j].getProbs()[i];
                        ++j;
                    }
                    posTags[i] = new String[j];
                    System.arraycopy(actPos, 0, posTags[i], 0, j);
                    posProbs[i] = new double[j];
                    System.arraycopy(actProp, 0, posProbs[i], 0, j);
                }
                if (chunker != null) {
                    String[] pos = posSequences[0].getOutcomes().toArray(new String[tokens.length]);
                    chunkSpans = chunker.chunkAsSpans(tokens, pos);
                    chunkProps = chunker.probs();
                } else if (posTypeChunker != null) {
                    chunkSpans = posTypeChunker.chunkAsSpans(tokens, posTags, (double[][])posProbs);
                    chunkProps = new double[chunkSpans.length];
                    Arrays.fill(chunkProps, 1.0);
                } else {
                    chunkSpans = null;
                    chunkProps = null;
                }
            } else {
                posTags = null;
                posProbs = null;
                chunkSpans = null;
                chunkProps = null;
            }
            ArrayList<Token> tokenList = new ArrayList<Token>(tokenSpans.length);
            for (int i = 0; i < tokenSpans.length; ++i) {
                tokenList.add(new Token(tokenSpans[i], tokens[i], posTags == null ? null : posTags[i], posProbs == null ? null : posProbs[i]));
            }
            this.tokens = Collections.unmodifiableList(tokenList);
            if (chunkSpans != null) {
                ArrayList<Chunk> chunkList = new ArrayList<Chunk>(chunkSpans.length);
                for (int i = 0; i < chunkSpans.length; ++i) {
                    chunkList.add(new Chunk(chunkSpans[i], chunkProps[i]));
                }
                this.chunks = Collections.unmodifiableList(chunkList);
            } else {
                this.chunks = null;
            }
        }

        public List<Token> getTokens() {
            return this.tokens;
        }

        public List<Chunk> getChunks() {
            return this.chunks;
        }

        public String getText() {
            return this.sentence;
        }

        public String getLanguage() {
            return this.language;
        }

        public int getOffset() {
            return this.offset;
        }

        public class Chunk {
            protected final Span span;
            protected final Span chunkSpan;
            protected final double probability;
            private String __text;
            private List<Token> __chunkTokens;

            private Chunk(Span chunkSpan, double probability) {
                this.chunkSpan = chunkSpan;
                this.span = new Span(AnalysedText.this.tokens.get(chunkSpan.getStart()).getStart(), AnalysedText.this.tokens.get(chunkSpan.getEnd()).getEnd());
                this.probability = probability;
            }

            public List<Token> getTokens() {
                if (this.__chunkTokens == null) {
                    this.__chunkTokens = AnalysedText.this.tokens.subList(this.chunkSpan.getStart(), this.chunkSpan.getEnd());
                }
                return this.__chunkTokens;
            }

            public int getStart() {
                return this.chunkSpan.getStart();
            }

            public int getEnd() {
                return this.chunkSpan.getEnd();
            }

            public int getSize() {
                return this.chunkSpan.length();
            }

            public double getProbability() {
                return this.probability;
            }

            public String getText() {
                if (this.__text == null) {
                    this.__text = this.span.getCoveredText((CharSequence)AnalysedText.this.sentence).toString();
                }
                return this.__text;
            }

            public String toString() {
                return this.getText();
            }
        }

        public class Token {
            protected final Span span;
            protected String token;
            protected final String[] posTags;
            protected final double[] posProbabilities;
            protected final boolean hasAlphaNumeric;

            private Token(Span span, String token, String pos, double posProbability) {
                this(span, token, new String[]{pos}, new double[]{posProbability});
            }

            private Token(Span span, String token, String[] posTags, double[] posProbabilities) {
                this.span = span;
                this.posTags = posTags == null || posTags.length < 1 ? null : posTags;
                this.token = token;
                if (this.posTags == null) {
                    this.posProbabilities = null;
                } else {
                    if (posTags.length != posProbabilities.length) {
                        throw new IllegalStateException("POS Tag array and POS probability array MUST BE of the same size!");
                    }
                    this.posProbabilities = posProbabilities;
                }
                boolean foundAlphaNumericCahr = false;
                for (int i = 0; !foundAlphaNumericCahr && i < token.length(); ++i) {
                    foundAlphaNumericCahr = Character.isLetterOrDigit(token.charAt(i));
                }
                this.hasAlphaNumeric = foundAlphaNumericCahr;
            }

            public int getStart() {
                return this.span.getStart();
            }

            public int getEnd() {
                return this.span.getEnd();
            }

            public String getPosTag() {
                return this.posTags == null ? null : this.posTags[0];
            }

            public String[] getPosTags() {
                return this.posTags;
            }

            public double getPosProbability() {
                return this.posProbabilities == null ? -1.0 : this.posProbabilities[0];
            }

            public double[] getPosProbabilities() {
                return this.posProbabilities;
            }

            public String getText() {
                if (this.token == null) {
                    this.token = this.span.getCoveredText((CharSequence)AnalysedText.this.sentence).toString();
                }
                return this.token;
            }

            public boolean hasAplhaNumericChar() {
                return this.hasAlphaNumeric;
            }

            public String toString() {
                return this.getText() + (this.posTags != null ? '_' + (this.posTags.length == 1 ? this.posTags[0] : Arrays.toString(this.posTags)) : "");
            }
        }
    }

    private final class TextAnalysisIterator
    implements Iterator<AnalysedText> {
        private final String text;
        private final Span[] sentenceSpans;
        private int current = 0;
        private final String language;

        private TextAnalysisIterator(String text, String language) {
            SentenceDetector sd;
            this.text = text;
            this.language = language;
            this.sentenceSpans = text == null || text.isEmpty() ? new Span[0] : ((sd = TextAnalyzer.this.getSentenceDetector()) != null ? sd.sentPosDetect(text) : new Span[]{new Span(0, text.length())});
        }

        @Override
        public boolean hasNext() {
            return this.sentenceSpans.length > this.current;
        }

        @Override
        public AnalysedText next() {
            Span sentenceSpan = this.sentenceSpans[this.current];
            String sentence = sentenceSpan.getCoveredText((CharSequence)this.text).toString();
            ++this.current;
            return new AnalysedText(sentence, this.language, sentenceSpan.getStart());
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Removal of Sentences of the prsed Text is not supported!");
        }
    }

    public static final class TextAnalyzerConfig {
        protected boolean forceSimpleTokenizer = false;
        protected boolean forceKeywordTokenizer = false;
        protected boolean enablePosTagger = true;
        protected boolean enableChunker = true;
        protected boolean enableSentenceDetector = true;
        protected boolean enablePosTypeChunker = true;
        protected boolean forcePosTypeChunker = true;
        private double minPosTagProbability = 0.75;

        public final boolean isSimpleTokenizerForced() {
            return this.forceSimpleTokenizer;
        }

        public final void forceSimpleTokenizer(boolean useSimpleTokenizer) {
            this.forceSimpleTokenizer = useSimpleTokenizer;
            if (useSimpleTokenizer) {
                this.forceKeywordTokenizer = false;
            }
        }

        public final boolean isKeywordTokenizerForced() {
            return this.forceKeywordTokenizer;
        }

        public final void forceKeywordTokenizer(boolean useKeywordTokenizer) {
            this.forceKeywordTokenizer = useKeywordTokenizer;
            if (useKeywordTokenizer) {
                this.forceSimpleTokenizer = false;
            }
        }

        public final boolean isPosTaggerEnable() {
            return this.enablePosTagger;
        }

        public final void enablePosTagger(boolean enablePosTagger) {
            this.enablePosTagger = enablePosTagger;
        }

        public final boolean isChunkerEnabled() {
            return this.enableChunker;
        }

        public final void enableChunker(boolean enableChunker) {
            this.enableChunker = enableChunker;
        }

        public final boolean isSentenceDetectorEnabled() {
            return this.enableSentenceDetector;
        }

        public final void enableSentenceDetector(boolean enableSentenceDetector) {
            this.enableSentenceDetector = enableSentenceDetector;
        }

        public final boolean isPosTypeChunkerEnabled() {
            return this.enablePosTypeChunker;
        }

        public final void enablePosTypeChunker(boolean enablePosTypeChunker) {
            this.enablePosTypeChunker = enablePosTypeChunker;
            if (!enablePosTypeChunker) {
                this.forcePosTypeChunker(enablePosTypeChunker);
            }
        }

        public final boolean isPosTypeChunkerForced() {
            return this.forcePosTypeChunker;
        }

        public final void forcePosTypeChunker(boolean forcePosTypeChunker) {
            this.forcePosTypeChunker = forcePosTypeChunker;
            if (forcePosTypeChunker) {
                this.enablePosTypeChunker(true);
            }
        }

        public final double getMinPosTypeProbability() {
            return this.minPosTagProbability;
        }

        public final void setMinPosTagProbability(double probability) {
            if (probability > 1.0) {
                throw new IllegalArgumentException("The minimum POS tag probability MUST be set to a value <= 1 (parsed:" + this.minPosTagProbability + "");
            }
            this.minPosTagProbability = probability;
        }
    }
}

