/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import java.io.Serializable;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.feature.CountVectorizer$;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.CountVectorizerParams;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.shared.HasInputCol;
import org.apache.spark.ml.param.shared.HasOutputCol;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.storage.StorageLevel$;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.None$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.Seq;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.java8.JFunction0;
import scala.runtime.java8.JFunction1;

@ScalaSignature(bytes="\u0006\u0001\u0005ue\u0001\u0002\u000b\u0016\u0001\u0001B\u0001B\r\u0001\u0003\u0006\u0004%\te\r\u0005\t\u0015\u0002\u0011\t\u0011)A\u0005i!)A\n\u0001C\u0001\u001b\")A\n\u0001C\u0001%\")A\u000b\u0001C\u0001+\")!\f\u0001C\u00017\")a\f\u0001C\u0001?\")a\r\u0001C\u0001O\")Q\u000e\u0001C\u0001]\")1\u000f\u0001C\u0001i\")q\u000f\u0001C\u0001q\"9\u0011\u0011\u0001\u0001\u0005B\u0005\r\u0001bBA\u0018\u0001\u0011\u0005\u0013\u0011\u0007\u0005\b\u0003\u000b\u0002A\u0011IA$\u000f\u001d\ti&\u0006E\u0001\u0003?2a\u0001F\u000b\t\u0002\u0005\u0005\u0004B\u0002'\u0011\t\u0003\t)\bC\u0004\u0002xA!\t%!\u001f\t\u0013\u0005\u0015\u0005#!A\u0005\n\u0005\u001d%aD\"pk:$h+Z2u_JL'0\u001a:\u000b\u0005Y9\u0012a\u00024fCR,(/\u001a\u0006\u00031e\t!!\u001c7\u000b\u0005iY\u0012!B:qCJ\\'B\u0001\u000f\u001e\u0003\u0019\t\u0007/Y2iK*\ta$A\u0002pe\u001e\u001c\u0001a\u0005\u0003\u0001C%b\u0003c\u0001\u0012$K5\tq#\u0003\u0002%/\tIQi\u001d;j[\u0006$xN\u001d\t\u0003M\u001dj\u0011!F\u0005\u0003QU\u0011AcQ8v]R4Vm\u0019;pe&TXM]'pI\u0016d\u0007C\u0001\u0014+\u0013\tYSCA\u000bD_VtGOV3di>\u0014\u0018N_3s!\u0006\u0014\u0018-\\:\u0011\u00055\u0002T\"\u0001\u0018\u000b\u0005=:\u0012\u0001B;uS2L!!\r\u0018\u0003+\u0011+g-Y;miB\u000b'/Y7t/JLG/\u00192mK\u0006\u0019Q/\u001b3\u0016\u0003Q\u0002\"!\u000e \u000f\u0005Yb\u0004CA\u001c;\u001b\u0005A$BA\u001d \u0003\u0019a$o\\8u})\t1(A\u0003tG\u0006d\u0017-\u0003\u0002>u\u00051\u0001K]3eK\u001aL!a\u0010!\u0003\rM#(/\u001b8h\u0015\ti$\bK\u0002\u0002\u0005\"\u0003\"a\u0011$\u000e\u0003\u0011S!!R\r\u0002\u0015\u0005tgn\u001c;bi&|g.\u0003\u0002H\t\n)1+\u001b8dK\u0006\n\u0011*A\u00032]Ur\u0003'\u0001\u0003vS\u0012\u0004\u0003f\u0001\u0002C\u0011\u00061A(\u001b8jiz\"\"AT(\u0011\u0005\u0019\u0002\u0001\"\u0002\u001a\u0004\u0001\u0004!\u0004fA(C\u0011\"\u001a1A\u0011%\u0015\u00039C3\u0001\u0002\"I\u0003-\u0019X\r^%oaV$8i\u001c7\u0015\u0005Y;V\"\u0001\u0001\t\u000ba+\u0001\u0019\u0001\u001b\u0002\u000bY\fG.^3)\u0007\u0015\u0011\u0005*\u0001\u0007tKR|U\u000f\u001e9vi\u000e{G\u000e\u0006\u0002W9\")\u0001L\u0002a\u0001i!\u001aaA\u0011%\u0002\u0019M,GOV8dC\n\u001c\u0016N_3\u0015\u0005Y\u0003\u0007\"\u0002-\b\u0001\u0004\t\u0007C\u00012d\u001b\u0005Q\u0014B\u00013;\u0005\rIe\u000e\u001e\u0015\u0004\u000f\tC\u0015\u0001C:fi6Kg\u000e\u0012$\u0015\u0005YC\u0007\"\u0002-\t\u0001\u0004I\u0007C\u00012k\u0013\tY'H\u0001\u0004E_V\u0014G.\u001a\u0015\u0004\u0011\tC\u0015\u0001C:fi6\u000b\u0007\u0010\u0012$\u0015\u0005Y{\u0007\"\u0002-\n\u0001\u0004I\u0007fA\u0005Cc\u0006\n!/A\u00033]Qr\u0003'\u0001\u0005tKRl\u0015N\u001c+G)\t1V\u000fC\u0003Y\u0015\u0001\u0007\u0011\u000eK\u0002\u000b\u0005\"\u000b\u0011b]3u\u0005&t\u0017M]=\u0015\u0005YK\b\"\u0002-\f\u0001\u0004Q\bC\u00012|\u0013\ta(HA\u0004C_>dW-\u00198)\u0007-\u0011e0I\u0001\u0000\u0003\u0015\u0011d\u0006\r\u00181\u0003\r1\u0017\u000e\u001e\u000b\u0004K\u0005\u0015\u0001bBA\u0004\u0019\u0001\u0007\u0011\u0011B\u0001\bI\u0006$\u0018m]3ua\u0011\tY!a\u0007\u0011\r\u00055\u00111CA\f\u001b\t\tyAC\u0002\u0002\u0012e\t1a]9m\u0013\u0011\t)\"a\u0004\u0003\u000f\u0011\u000bG/Y:fiB!\u0011\u0011DA\u000e\u0019\u0001!A\"!\b\u0002\u0006\u0005\u0005\t\u0011!B\u0001\u0003?\u00111a\u0018\u00132#\u0011\t\t#a\n\u0011\u0007\t\f\u0019#C\u0002\u0002&i\u0012qAT8uQ&tw\rE\u0002c\u0003SI1!a\u000b;\u0005\r\te.\u001f\u0015\u0004\u0019\ts\u0018a\u0004;sC:\u001chm\u001c:n'\u000eDW-\\1\u0015\t\u0005M\u0012q\b\t\u0005\u0003k\tY$\u0004\u0002\u00028)!\u0011\u0011HA\b\u0003\u0015!\u0018\u0010]3t\u0013\u0011\ti$a\u000e\u0003\u0015M#(/^2u)f\u0004X\rC\u0004\u0002B5\u0001\r!a\r\u0002\rM\u001c\u0007.Z7bQ\ri!\tS\u0001\u0005G>\u0004\u0018\u0010F\u0002O\u0003\u0013Bq!a\u0013\u000f\u0001\u0004\ti%A\u0003fqR\u0014\u0018\r\u0005\u0003\u0002P\u0005USBAA)\u0015\r\t\u0019fF\u0001\u0006a\u0006\u0014\u0018-\\\u0005\u0005\u0003/\n\tF\u0001\u0005QCJ\fW.T1qQ\rq!\t\u0013\u0015\u0004\u0001\tC\u0015aD\"pk:$h+Z2u_JL'0\u001a:\u0011\u0005\u0019\u00022c\u0002\t\u0002d\u0005%\u0014q\u000e\t\u0004E\u0006\u0015\u0014bAA4u\t1\u0011I\\=SK\u001a\u0004B!LA6\u001d&\u0019\u0011Q\u000e\u0018\u0003+\u0011+g-Y;miB\u000b'/Y7t%\u0016\fG-\u00192mKB\u0019!-!\u001d\n\u0007\u0005M$H\u0001\u0007TKJL\u0017\r\\5{C\ndW\r\u0006\u0002\u0002`\u0005!An\\1e)\rq\u00151\u0010\u0005\u0007\u0003{\u0012\u0002\u0019\u0001\u001b\u0002\tA\fG\u000f\u001b\u0015\u0005%\t\u000b\t)\t\u0002\u0002\u0004\u0006)\u0011G\f\u001c/a\u0005Y!/Z1e%\u0016\u001cx\u000e\u001c<f)\t\tI\t\u0005\u0003\u0002\f\u0006UUBAAG\u0015\u0011\ty)!%\u0002\t1\fgn\u001a\u0006\u0003\u0003'\u000bAA[1wC&!\u0011qSAG\u0005\u0019y%M[3di\"\"\u0001CQAAQ\u0011y!)!!")
public class CountVectorizer
extends Estimator<CountVectorizerModel>
implements CountVectorizerParams,
DefaultParamsWritable {
    private final String uid;
    private final IntParam vocabSize;
    private final DoubleParam minDF;
    private final DoubleParam maxDF;
    private final DoubleParam minTF;
    private final BooleanParam binary;
    private final Param<String> outputCol;
    private final Param<String> inputCol;

    public static CountVectorizer load(String string) {
        return CountVectorizer$.MODULE$.load(string);
    }

    public static MLReader<CountVectorizer> read() {
        return CountVectorizer$.MODULE$.read();
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable.write$(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable.save$(this, path);
    }

    @Override
    public int getVocabSize() {
        return CountVectorizerParams.getVocabSize$(this);
    }

    @Override
    public double getMinDF() {
        return CountVectorizerParams.getMinDF$(this);
    }

    @Override
    public double getMaxDF() {
        return CountVectorizerParams.getMaxDF$(this);
    }

    @Override
    public StructType validateAndTransformSchema(StructType schema) {
        return CountVectorizerParams.validateAndTransformSchema$(this, schema);
    }

    @Override
    public double getMinTF() {
        return CountVectorizerParams.getMinTF$(this);
    }

    @Override
    public boolean getBinary() {
        return CountVectorizerParams.getBinary$(this);
    }

    @Override
    public final String getOutputCol() {
        return HasOutputCol.getOutputCol$(this);
    }

    @Override
    public final String getInputCol() {
        return HasInputCol.getInputCol$(this);
    }

    @Override
    public IntParam vocabSize() {
        return this.vocabSize;
    }

    @Override
    public DoubleParam minDF() {
        return this.minDF;
    }

    @Override
    public DoubleParam maxDF() {
        return this.maxDF;
    }

    @Override
    public DoubleParam minTF() {
        return this.minTF;
    }

    @Override
    public BooleanParam binary() {
        return this.binary;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$vocabSize_$eq(IntParam x$1) {
        this.vocabSize = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minDF_$eq(DoubleParam x$1) {
        this.minDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$maxDF_$eq(DoubleParam x$1) {
        this.maxDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minTF_$eq(DoubleParam x$1) {
        this.minTF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$binary_$eq(BooleanParam x$1) {
        this.binary = x$1;
    }

    @Override
    public final Param<String> outputCol() {
        return this.outputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasOutputCol$_setter_$outputCol_$eq(Param<String> x$1) {
        this.outputCol = x$1;
    }

    @Override
    public final Param<String> inputCol() {
        return this.inputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param<String> x$1) {
        this.inputCol = x$1;
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public CountVectorizer setInputCol(String value) {
        return (CountVectorizer)this.set(this.inputCol(), value);
    }

    public CountVectorizer setOutputCol(String value) {
        return (CountVectorizer)this.set(this.outputCol(), value);
    }

    public CountVectorizer setVocabSize(int value) {
        return (CountVectorizer)this.set(this.vocabSize(), BoxesRunTime.boxToInteger((int)value));
    }

    public CountVectorizer setMinDF(double value) {
        return (CountVectorizer)this.set(this.minDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMaxDF(double value) {
        return (CountVectorizer)this.set(this.maxDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMinTF(double value) {
        return (CountVectorizer)this.set(this.minTF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setBinary(boolean value) {
        return (CountVectorizer)this.set(this.binary(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    @Override
    public CountVectorizerModel fit(Dataset<?> dataset) {
        None$ none$;
        boolean countingRequired;
        this.transformSchema(dataset.schema(), true);
        if (BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) >= 1.0 && BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) >= 1.0 || BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) < 1.0 && BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) < 1.0) {
            Predef$.MODULE$.require(BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) >= BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())), (Function0 & Serializable & scala.Serializable)() -> "maxDF must be >= minDF.");
        }
        int vocSize = BoxesRunTime.unboxToInt((Object)this.$(this.vocabSize()));
        RDD input = dataset.select(this.$(this.inputCol()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).rdd().map((Function1 & Serializable & scala.Serializable)x$1 -> x$1.getSeq(0), ClassTag$.MODULE$.apply(Seq.class));
        boolean bl = countingRequired = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) < 1.0 || BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) < 1.0;
        if (countingRequired) {
            StorageLevel storageLevel = dataset.storageLevel();
            StorageLevel storageLevel2 = StorageLevel$.MODULE$.NONE();
            Object object = !(storageLevel != null ? !storageLevel.equals(storageLevel2) : storageLevel2 != null) ? input.persist(StorageLevel$.MODULE$.MEMORY_AND_DISK()) : BoxedUnit.UNIT;
            none$ = new Some((Object)BoxesRunTime.boxToLong((long)input.count()));
        } else {
            none$ = None$.MODULE$;
        }
        None$ maybeInputSize = none$;
        double minDf = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) * (double)BoxesRunTime.unboxToLong((Object)maybeInputSize.get());
        double maxDf = BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) * (double)BoxesRunTime.unboxToLong((Object)maybeInputSize.get());
        Predef$.MODULE$.require(maxDf >= minDf, (Function0 & Serializable & scala.Serializable)() -> "maxDF must be >= minDF.");
        RDD allWordCounts = RDD$.MODULE$.rddToPairRDDFunctions(input.flatMap((Function1 & Serializable & scala.Serializable)tokens -> {
            OpenHashMap.mcJ.sp wc = new OpenHashMap.mcJ.sp(ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long());
            tokens.foreach(arg_0 -> CountVectorizer.$anonfun$fit$5$adapted((OpenHashMap)wc, arg_0));
            return (Iterable)wc.map((Function1 & Serializable & scala.Serializable)x0$1 -> {
                Tuple2 tuple2 = x0$1;
                if (tuple2 != null) {
                    String word = (String)tuple2._1();
                    long count = tuple2._2$mcJ$sp();
                    return new Tuple2((Object)word, (Object)new Tuple2.mcJI.sp(count, 1));
                }
                throw new MatchError((Object)tuple2);
            }, Iterable$.MODULE$.canBuildFrom());
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), (Ordering)Ordering.String$.MODULE$).reduceByKey((Function2 & Serializable & scala.Serializable)(wcdf1, wcdf2) -> new Tuple2.mcJI.sp(wcdf1._1$mcJ$sp() + wcdf2._1$mcJ$sp(), wcdf1._2$mcI$sp() + wcdf2._2$mcI$sp()));
        boolean filteringRequired = this.isSet(this.minDF()) || this.isSet(this.maxDF());
        RDD maybeFilteredWordCounts = filteringRequired ? allWordCounts.filter((Function1 & Serializable & scala.Serializable)x0$2 -> BoxesRunTime.boxToBoolean((boolean)CountVectorizer.$anonfun$fit$10(minDf, maxDf, x0$2))) : allWordCounts;
        RDD wordCounts = maybeFilteredWordCounts.map((Function1 & Serializable & scala.Serializable)x0$3 -> {
            Tuple2 tuple2 = x0$3;
            if (tuple2 != null) {
                String word = (String)tuple2._1();
                Tuple2 tuple22 = (Tuple2)tuple2._2();
                if (tuple22 != null) {
                    long count = tuple22._1$mcJ$sp();
                    return new Tuple2((Object)word, (Object)BoxesRunTime.boxToLong((long)count));
                }
            }
            throw new MatchError((Object)tuple2);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        long fullVocabSize = wordCounts.count();
        String[] vocab = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])wordCounts.top((int)scala.math.package$.MODULE$.min(fullVocabSize, (long)vocSize), package$.MODULE$.Ordering().by((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToLong((long)x$3._2$mcJ$sp()), (Ordering)Ordering.Long$.MODULE$)))).map((Function1 & Serializable & scala.Serializable)x$4 -> (String)x$4._1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        StorageLevel storageLevel = input.getStorageLevel();
        StorageLevel storageLevel3 = StorageLevel$.MODULE$.NONE();
        Object object = (storageLevel == null ? storageLevel3 != null : !storageLevel.equals(storageLevel3)) ? input.unpersist(input.unpersist$default$1()) : BoxedUnit.UNIT;
        wordCounts.unpersist(wordCounts.unpersist$default$1());
        if (new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])vocab)).isEmpty()) {
            this.logWarning((Function0<String>)(Function0 & Serializable & scala.Serializable)() -> "The vocabulary size is empty. If this was unexpected, you may wish to lower minDF (or) increase maxDF.");
        }
        return this.copyValues(new CountVectorizerModel(this.uid(), vocab).setParent(this), this.copyValues$default$2());
    }

    @Override
    public StructType transformSchema(StructType schema) {
        return this.validateAndTransformSchema(schema);
    }

    @Override
    public CountVectorizer copy(ParamMap extra) {
        return (CountVectorizer)this.defaultCopy(extra);
    }

    public static final /* synthetic */ boolean $anonfun$fit$10(double minDf$1, double maxDf$1, Tuple2 x0$2) {
        Tuple2 tuple2;
        Tuple2 tuple22 = x0$2;
        if (tuple22 != null && (tuple2 = (Tuple2)tuple22._2()) != null) {
            int df = tuple2._2$mcI$sp();
            return (double)df >= minDf$1 && (double)df <= maxDf$1;
        }
        throw new MatchError((Object)tuple22);
    }

    public CountVectorizer(String uid) {
        this.uid = uid;
        HasInputCol.$init$(this);
        HasOutputCol.$init$(this);
        CountVectorizerParams.$init$(this);
        MLWritable.$init$(this);
        DefaultParamsWritable.$init$(this);
    }

    public CountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("cntVec"));
    }

    public static final /* synthetic */ Object $anonfun$fit$5$adapted(OpenHashMap wc$1, String w) {
        return BoxesRunTime.boxToLong((long)wc$1.changeValue$mcJ$sp((Object)w, (Function0)(JFunction0.mcJ.sp & Serializable & scala.Serializable)() -> 1L, (Function1)(JFunction1.mcJJ.sp & Serializable & scala.Serializable)x$2 -> x$2 + 1L));
    }
}

