/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import org.apache.spark.annotation.Experimental;
import org.apache.spark.ml.UnaryTransformer;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.util.Identifiable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.util.matching.Regex;

@Experimental
@ScalaSignature(bytes="\u0006\u0001\u0005ea\u0001B\u0001\u0003\u00015\u0011aBU3hKb$vn[3oSj,'O\u0003\u0002\u0004\t\u00059a-Z1ukJ,'BA\u0003\u0007\u0003\tiGN\u0003\u0002\b\u0011\u0005)1\u000f]1sW*\u0011\u0011BC\u0001\u0007CB\f7\r[3\u000b\u0003-\t1a\u001c:h\u0007\u0001\u0019\"\u0001\u0001\b\u0011\u000b=\u0001\"\u0003\b\u0015\u000e\u0003\u0011I!!\u0005\u0003\u0003!Us\u0017M]=Ue\u0006t7OZ8s[\u0016\u0014\bCA\n\u001a\u001d\t!r#D\u0001\u0016\u0015\u00051\u0012!B:dC2\f\u0017B\u0001\r\u0016\u0003\u0019\u0001&/\u001a3fM&\u0011!d\u0007\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005a)\u0002cA\u000f&%9\u0011ad\t\b\u0003?\tj\u0011\u0001\t\u0006\u0003C1\ta\u0001\u0010:p_Rt\u0014\"\u0001\f\n\u0005\u0011*\u0012a\u00029bG.\fw-Z\u0005\u0003M\u001d\u00121aU3r\u0015\t!S\u0003\u0005\u0002*\u00015\t!\u0001\u0003\u0005,\u0001\t\u0015\r\u0011\"\u0011-\u0003\r)\u0018\u000eZ\u000b\u0002%!Aa\u0006\u0001B\u0001B\u0003%!#\u0001\u0003vS\u0012\u0004\u0003\"\u0002\u0019\u0001\t\u0003\t\u0014A\u0002\u001fj]&$h\b\u0006\u0002)e!)1f\fa\u0001%!)\u0001\u0007\u0001C\u0001iQ\t\u0001\u0006C\u00047\u0001\t\u0007I\u0011A\u001c\u0002\u001d5Lg\u000eV8lK:dUM\\4uQV\t\u0001\b\u0005\u0002:y5\t!H\u0003\u0002<\t\u0005)\u0001/\u0019:b[&\u0011QH\u000f\u0002\t\u0013:$\b+\u0019:b[\"1q\b\u0001Q\u0001\na\nq\"\\5o)>\\WM\u001c'f]\u001e$\b\u000e\t\u0005\u0006\u0003\u0002!\tAQ\u0001\u0012g\u0016$X*\u001b8U_.,g\u000eT3oORDGCA\"E\u001b\u0005\u0001\u0001\"B#A\u0001\u00041\u0015!\u0002<bYV,\u0007C\u0001\u000bH\u0013\tAUCA\u0002J]RDQA\u0013\u0001\u0005\u0002-\u000b\u0011cZ3u\u001b&tGk\\6f]2+gn\u001a;i+\u00051\u0005bB'\u0001\u0005\u0004%\tAT\u0001\u0005O\u0006\u00048/F\u0001P!\tI\u0004+\u0003\u0002Ru\ta!i\\8mK\u0006t\u0007+\u0019:b[\"11\u000b\u0001Q\u0001\n=\u000bQaZ1qg\u0002BQ!\u0016\u0001\u0005\u0002Y\u000bqa]3u\u000f\u0006\u00048\u000f\u0006\u0002D/\")Q\t\u0016a\u00011B\u0011A#W\u0005\u00035V\u0011qAQ8pY\u0016\fg\u000eC\u0003]\u0001\u0011\u0005Q,A\u0004hKR<\u0015\r]:\u0016\u0003aCqa\u0018\u0001C\u0002\u0013\u0005\u0001-A\u0004qCR$XM\u001d8\u0016\u0003\u0005\u00042!\u000f2\u0013\u0013\t\u0019'HA\u0003QCJ\fW\u000e\u0003\u0004f\u0001\u0001\u0006I!Y\u0001\ta\u0006$H/\u001a:oA!)q\r\u0001C\u0001Q\u0006Q1/\u001a;QCR$XM\u001d8\u0015\u0005\rK\u0007\"B#g\u0001\u0004\u0011\u0002\"B6\u0001\t\u0003a\u0013AC4fiB\u000bG\u000f^3s]\")Q\u000e\u0001C)]\u0006\u00192M]3bi\u0016$&/\u00198tM>\u0014XNR;oGV\tq\u000e\u0005\u0003\u0015aJa\u0012BA9\u0016\u0005%1UO\\2uS>t\u0017\u0007C\u0003t\u0001\u0011EC/A\twC2LG-\u0019;f\u0013:\u0004X\u000f\u001e+za\u0016$\"!\u001e=\u0011\u0005Q1\u0018BA<\u0016\u0005\u0011)f.\u001b;\t\u000be\u0014\b\u0019\u0001>\u0002\u0013%t\u0007/\u001e;UsB,\u0007cA>\u0002\u00025\tAP\u0003\u0002~}\u0006)A/\u001f9fg*\u0011qPB\u0001\u0004gFd\u0017bAA\u0002y\nAA)\u0019;b)f\u0004X\rC\u0004\u0002\b\u0001!\t&!\u0003\u0002\u001d=,H\u000f];u\t\u0006$\u0018\rV=qKV\t!\u0010K\u0002\u0001\u0003\u001b\u0001B!a\u0004\u0002\u00165\u0011\u0011\u0011\u0003\u0006\u0004\u0003'1\u0011AC1o]>$\u0018\r^5p]&!\u0011qCA\t\u00051)\u0005\u0010]3sS6,g\u000e^1m\u0001")
public class RegexTokenizer
extends UnaryTransformer<String, Seq<String>, RegexTokenizer> {
    private final String uid;
    private final IntParam minTokenLength;
    private final BooleanParam gaps;
    private final Param<String> pattern;

    @Override
    public String uid() {
        return this.uid;
    }

    public IntParam minTokenLength() {
        return this.minTokenLength;
    }

    public RegexTokenizer setMinTokenLength(int value) {
        return (RegexTokenizer)this.set(this.minTokenLength(), BoxesRunTime.boxToInteger((int)value));
    }

    public int getMinTokenLength() {
        return BoxesRunTime.unboxToInt((Object)this.$(this.minTokenLength()));
    }

    public BooleanParam gaps() {
        return this.gaps;
    }

    public RegexTokenizer setGaps(boolean value) {
        return (RegexTokenizer)this.set(this.gaps(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    public boolean getGaps() {
        return BoxesRunTime.unboxToBoolean((Object)this.$(this.gaps()));
    }

    public Param<String> pattern() {
        return this.pattern;
    }

    public RegexTokenizer setPattern(String value) {
        return (RegexTokenizer)this.set(this.pattern(), value);
    }

    public String getPattern() {
        return this.$(this.pattern());
    }

    @Override
    public Function1<String, Seq<String>> createTransformFunc() {
        return new Serializable(this){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ RegexTokenizer $outer;

            public final Seq<String> apply(String str) {
                Regex re = new StringOps(Predef$.MODULE$.augmentString(this.$outer.$(this.$outer.pattern()))).r();
                Seq tokens = BoxesRunTime.unboxToBoolean((Object)this.$outer.$(this.$outer.gaps())) ? Predef$.MODULE$.refArrayOps((Object[])re.split((CharSequence)str)).toSeq() : re.findAllIn((CharSequence)str).toSeq();
                int minLength = BoxesRunTime.unboxToInt((Object)this.$outer.$(this.$outer.minTokenLength()));
                return (Seq)tokens.filter((Function1)new Serializable(this, minLength){
                    public static final long serialVersionUID = 0L;
                    private final int minLength$1;

                    public final boolean apply(String x$2) {
                        return x$2.length() >= this.minLength$1;
                    }
                    {
                        this.minLength$1 = minLength$1;
                    }
                });
            }
            {
                if ($outer == null) {
                    throw new NullPointerException();
                }
                this.$outer = $outer;
            }
        };
    }

    @Override
    public void validateInputType(DataType inputType) {
        DataType dataType = inputType;
        StringType$ stringType$ = StringType$.MODULE$;
        Predef$.MODULE$.require(!(dataType != null ? !dataType.equals(stringType$) : stringType$ != null), (Function0)new Serializable(this, inputType){
            public static final long serialVersionUID = 0L;
            private final DataType inputType$2;

            public final String apply() {
                return new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Input type must be string type but got ", "."})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.inputType$2}));
            }
            {
                this.inputType$2 = inputType$2;
            }
        });
    }

    @Override
    public DataType outputDataType() {
        return new ArrayType((DataType)StringType$.MODULE$, false);
    }

    public RegexTokenizer(String uid) {
        this.uid = uid;
        this.minTokenLength = new IntParam((Identifiable)this, "minTokenLength", "minimum token length (>= 0)", (Function1<Object, Object>)ParamValidators$.MODULE$.gtEq(0.0));
        this.gaps = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens");
        this.pattern = new Param(this, "pattern", "regex pattern used for tokenizing");
        this.setDefault((Seq<ParamPair<?>>)Predef$.MODULE$.wrapRefArray((Object[])new ParamPair[]{this.minTokenLength().$minus$greater(BoxesRunTime.boxToInteger((int)1)), this.gaps().$minus$greater(BoxesRunTime.boxToBoolean((boolean)true)), this.pattern().$minus$greater("\\s+")}));
    }

    public RegexTokenizer() {
        this(Identifiable$.MODULE$.randomUID("regexTok"));
    }
}

