/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.brat;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import opennlp.tools.formats.brat.BratAnnotation;
import opennlp.tools.formats.brat.BratDocument;
import opennlp.tools.formats.brat.SpanAnnotation;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;

public class BratDocumentParser {
    private SentenceDetector sentDetector;
    private Tokenizer tokenizer;

    public BratDocumentParser(SentenceDetector sentenceDetector, Tokenizer tokenizer) {
        this.sentDetector = sentenceDetector;
        this.tokenizer = tokenizer;
    }

    public List<NameSample> parse(BratDocument sample) {
        HashSet<String> entityIdSet = new HashSet<String>();
        HashMap<Integer, Span> coveredIndexes = new HashMap<Integer, Span>();
        for (BratAnnotation bratAnnotation : sample.getAnnotations()) {
            if (!(bratAnnotation instanceof SpanAnnotation)) continue;
            entityIdSet.add(bratAnnotation.getId());
            Span span = ((SpanAnnotation)bratAnnotation).getSpan();
            for (int i = span.getStart(); i < span.getEnd(); ++i) {
                coveredIndexes.put(i, span);
            }
        }
        ArrayList<Span> sentences = new ArrayList<Span>();
        for (Span sentence : this.sentDetector.sentPosDetect(sample.getText())) {
            Span conflictingName = (Span)coveredIndexes.get(sentence.getStart());
            if (sentences.size() > 0 && conflictingName != null && conflictingName.getStart() < sentence.getStart()) {
                Span lastSentence = (Span)sentences.remove(sentences.size() - 1);
                sentences.add(new Span(lastSentence.getStart(), sentence.getEnd()));
                System.out.println("Correcting sentence segmentation in document " + sample.getId());
                continue;
            }
            sentences.add(sentence);
        }
        ArrayList<NameSample> arrayList = new ArrayList<NameSample>(sentences.size());
        for (Span sentence : sentences) {
            String sentenceText = sentence.getCoveredText(sample.getText()).toString();
            Span[] tokens = this.tokenizer.tokenizePos(sentenceText);
            HashMap<Integer, Integer> tokenIndexMap = new HashMap<Integer, Integer>();
            for (int i = 0; i < tokens.length; ++i) {
                tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
                tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
            }
            ArrayList<Span> names = new ArrayList<Span>();
            for (BratAnnotation ann : sample.getAnnotations()) {
                SpanAnnotation entity;
                Span entitySpan;
                if (!(ann instanceof SpanAnnotation) || !sentence.contains(entitySpan = (entity = (SpanAnnotation)ann).getSpan())) continue;
                entityIdSet.remove(ann.getId());
                entitySpan = entitySpan.trim(sample.getText());
                Integer nameBeginIndex = (Integer)tokenIndexMap.get(-entitySpan.getStart());
                Integer nameEndIndex = (Integer)tokenIndexMap.get(entitySpan.getEnd());
                if (nameBeginIndex != null && nameEndIndex != null) {
                    names.add(new Span((int)nameBeginIndex, (int)nameEndIndex, entity.getType()));
                    continue;
                }
                System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + sample.getId() + ", it is not matching tokenization!");
            }
            arrayList.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, arrayList.size() == 0));
        }
        for (String id : entityIdSet) {
            System.err.println("Dropped entity " + id + " in document " + sample.getId() + ", is not matching sentence segmentation!");
        }
        return arrayList;
    }
}

