/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.utils.regex;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.utils.regex.AnalyzerTransformer;
import org.apache.mahout.utils.regex.IdentityFormatter;
import org.apache.mahout.utils.regex.IdentityTransformer;
import org.apache.mahout.utils.regex.RegexFormatter;
import org.apache.mahout.utils.regex.RegexTransformer;
import org.apache.mahout.utils.regex.RegexUtils;

public class RegexMapper
extends Mapper<LongWritable, Text, LongWritable, Text> {
    public static final String REGEX = "regex";
    public static final String GROUP_MATCHERS = "regex.groups";
    public static final String TRANSFORMER_CLASS = "transformer.class";
    public static final String FORMATTER_CLASS = "formatter.class";
    private Pattern regex;
    private List<Integer> groupsToKeep;
    private RegexTransformer transformer = RegexUtils.IDENTITY_TRANSFORMER;
    private RegexFormatter formatter = RegexUtils.IDENTITY_FORMATTER;
    public static final String ANALYZER_NAME = "analyzerName";

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        this.groupsToKeep = new ArrayList<Integer>();
        Configuration config = context.getConfiguration();
        String regexStr = config.get(REGEX);
        this.regex = Pattern.compile(regexStr);
        String[] groups = config.getStrings(GROUP_MATCHERS);
        if (groups != null) {
            for (String group : groups) {
                this.groupsToKeep.add(Integer.parseInt(group));
            }
        }
        this.transformer = (RegexTransformer)ClassUtils.instantiateAs((String)config.get(TRANSFORMER_CLASS, IdentityTransformer.class.getName()), RegexTransformer.class);
        String analyzerName = config.get(ANALYZER_NAME);
        if (analyzerName != null && this.transformer instanceof AnalyzerTransformer) {
            Analyzer analyzer = (Analyzer)ClassUtils.instantiateAs((String)analyzerName, Analyzer.class);
            ((AnalyzerTransformer)this.transformer).setAnalyzer(analyzer);
        }
        this.formatter = (RegexFormatter)ClassUtils.instantiateAs((String)config.get(FORMATTER_CLASS, IdentityFormatter.class.getName()), RegexFormatter.class);
    }

    protected void map(LongWritable key, Text text, Mapper.Context context) throws IOException, InterruptedException {
        String result = RegexUtils.extract(text.toString(), this.regex, this.groupsToKeep, " ", this.transformer);
        if (result != null && !result.isEmpty()) {
            String format = this.formatter.format(result);
            context.write((Object)key, (Object)new Text(format));
        }
    }
}

