/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.commons.opennlp;

import java.util.ArrayList;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;
import opennlp.tools.util.StringUtil;

public class KeywordTokenizer
implements Tokenizer {
    public static final KeywordTokenizer INSTANCE = new KeywordTokenizer();

    private KeywordTokenizer() {
    }

    public String[] tokenize(String s) {
        return Span.spansToStrings((Span[])this.tokenizePos(s), (CharSequence)s);
    }

    public Span[] tokenizePos(String s) {
        ArrayList<Span> tokens = new ArrayList<Span>();
        int sl = s.length();
        int start = -1;
        int pc = 0;
        for (int ci = 0; ci <= sl; ++ci) {
            char c = ci < sl ? (char)s.charAt(ci) : (char)' ';
            boolean isWhitespace = StringUtil.isWhitespace((char)c);
            if (!isWhitespace & start < 0) {
                start = ci;
            }
            if (!isWhitespace || start < 0) continue;
            if (start < ci - 1 && (pc == 46 || pc == 44 || pc == 33 || pc == 63 || pc == 59 || pc == 58)) {
                tokens.add(new Span(start, ci - 1));
                tokens.add(new Span(ci - 1, ci));
            } else {
                tokens.add(new Span(start, ci));
            }
            start = -1;
        }
        return tokens.toArray(new Span[tokens.size()]);
    }
}

