package smile.feature.extraction;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import smile.data.DataFrame;
import smile.data.Tuple;
import smile.data.transform.Transform;
import smile.data.type.DataTypes;
import smile.data.type.StructField;
import smile.data.type.StructType;
import smile.sort.QuickSort;

/* loaded from: input_file:smile/feature/extraction/BagOfWords.class */
public class BagOfWords implements Transform {
    private final Function<String, String[]> tokenizer;
    private final String[] words;
    private final Map<String, Integer> featureIndex;
    private final boolean binary;
    private final StructType schema;
    private final String[] columns;

    public BagOfWords(Function<String, String[]> function, String[] strArr) {
        this(null, function, strArr, false);
    }

    public BagOfWords(String[] strArr, Function<String, String[]> function, String[] strArr2, boolean z) {
        this.columns = strArr;
        this.tokenizer = function;
        this.binary = z;
        this.words = strArr2;
        this.featureIndex = new HashMap();
        for (int i = 0; i < strArr2.length; i++) {
            if (this.featureIndex.containsKey(strArr2[i])) {
                throw new IllegalArgumentException("Duplicated word:" + strArr2[i]);
            }
            this.featureIndex.put(strArr2[i], Integer.valueOf(i));
        }
        this.schema = new StructType((StructField[]) Arrays.stream(strArr2).map(str -> {
            return new StructField("BoW_" + str, DataTypes.IntegerType);
        }).toArray(i2 -> {
            return new StructField[i2];
        }));
    }

    public String[] features() {
        return this.words;
    }

    public static BagOfWords fit(DataFrame dataFrame, Function<String, String[]> function, int i, String... strArr) {
        HashMap hashMap = new HashMap();
        for (String str : strArr) {
            for (String str2 : dataFrame.column(str).toStringArray()) {
                for (String str3 : function.apply(str2)) {
                    Integer num = (Integer) hashMap.get(str3);
                    if (num == null) {
                        hashMap.put(str3, 1);
                    } else {
                        hashMap.put(str3, Integer.valueOf(num.intValue() + 1));
                    }
                }
            }
        }
        String[] strArr2 = new String[hashMap.size()];
        int[] iArr = new int[hashMap.size()];
        int i2 = 0;
        for (String str4 : hashMap.keySet()) {
            strArr2[i2] = str4;
            int i3 = i2;
            i2++;
            iArr[i3] = -((Integer) hashMap.get(str4)).intValue();
        }
        QuickSort.sort(iArr, (Object[]) strArr2);
        return new BagOfWords(strArr, function, (String[]) Arrays.copyOf(strArr2, Math.min(i, strArr2.length)), false);
    }

    @Override // java.util.function.Function
    public Tuple apply(Tuple tuple) {
        int[] iArr = new int[this.featureIndex.size()];
        for (String str : this.columns) {
            for (String str2 : this.tokenizer.apply(tuple.getString(str))) {
                Integer num = this.featureIndex.get(str2);
                if (num != null) {
                    if (this.binary) {
                        iArr[num.intValue()] = 1;
                    } else {
                        int intValue = num.intValue();
                        iArr[intValue] = iArr[intValue] + 1;
                    }
                }
            }
        }
        return Tuple.of(iArr, this.schema);
    }

    public int[] apply(String str) {
        int[] iArr = new int[this.featureIndex.size()];
        for (String str2 : this.tokenizer.apply(str)) {
            Integer num = this.featureIndex.get(str2);
            if (num != null) {
                if (this.binary) {
                    iArr[num.intValue()] = 1;
                } else {
                    int intValue = num.intValue();
                    iArr[intValue] = iArr[intValue] + 1;
                }
            }
        }
        return iArr;
    }
}
