package com.datumbox.framework.core.utilities.text.extractors;

import com.datumbox.framework.core.utilities.text.extractors.AbstractTextExtractor;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:com/datumbox/framework/core/utilities/text/extractors/NgramsExtractor.class */
public class NgramsExtractor extends AbstractTextExtractor<Parameters, String, Double> {

    /* loaded from: input_file:com/datumbox/framework/core/utilities/text/extractors/NgramsExtractor$Parameters.class */
    public static class Parameters extends AbstractTextExtractor.AbstractParameters {
        private static final long serialVersionUID = 1;
        private int maxCombinations = 3;
        private int minWordLength = 1;
        private int minWordOccurrence = 1;
        private int examinationWindowLength = 3;
        private int maxDistanceBetweenKwds = 0;

        public int getMaxCombinations() {
            return this.maxCombinations;
        }

        public void setMaxCombinations(int i) {
            this.maxCombinations = i;
        }

        public int getMinWordLength() {
            return this.minWordLength;
        }

        public void setMinWordLength(int i) {
            this.minWordLength = i;
        }

        public int getMinWordOccurrence() {
            return this.minWordOccurrence;
        }

        public void setMinWordOccurrence(int i) {
            this.minWordOccurrence = i;
        }

        public int getExaminationWindowLength() {
            return this.examinationWindowLength;
        }

        public void setExaminationWindowLength(int i) {
            this.examinationWindowLength = i;
        }

        public int getMaxDistanceBetweenKwds() {
            return this.maxDistanceBetweenKwds;
        }

        public void setMaxDistanceBetweenKwds(int i) {
            this.maxDistanceBetweenKwds = i;
        }
    }

    public NgramsExtractor(Parameters parameters) {
        super(parameters);
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // com.datumbox.framework.core.utilities.text.extractors.AbstractTextExtractor, com.datumbox.framework.common.interfaces.Extractable
    public Map<String, Double> extract(String str) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        int buildInternalArrays = buildInternalArrays(str, hashMap, hashMap2, linkedHashMap);
        int maxCombinations = ((Parameters) this.parameters).getMaxCombinations();
        HashMap hashMap3 = new HashMap();
        for (Map.Entry<Integer, Integer> entry : linkedHashMap.entrySet()) {
            if (useThisWord(entry.getValue(), hashMap, hashMap2)) {
                for (Map.Entry<LinkedList<Integer>, Double> entry2 : getPositionCombinationsWithinWindow(entry.getKey(), maxCombinations, hashMap, hashMap2, linkedHashMap, buildInternalArrays).entrySet()) {
                    LinkedList<Integer> key = entry2.getKey();
                    StringBuilder sb = new StringBuilder(key.size() * 6);
                    Iterator<Integer> it = key.iterator();
                    while (it.hasNext()) {
                        sb.append(hashMap.get(linkedHashMap.get(it.next()))).append(StringUtils.SPACE);
                    }
                    if (sb.length() > 0) {
                        String trim = sb.toString().trim();
                        hashMap3.put(trim, Double.valueOf(((Double) hashMap3.getOrDefault(trim, Double.valueOf(0.0d))).doubleValue() + entry2.getValue().doubleValue()));
                    }
                }
            }
        }
        double minWordOccurrence = ((Parameters) this.parameters).getMinWordOccurrence();
        Iterator it2 = hashMap3.entrySet().iterator();
        while (it2.hasNext()) {
            if (((Double) ((Map.Entry) it2.next()).getValue()).doubleValue() < minWordOccurrence) {
                it2.remove();
            }
        }
        return hashMap3;
    }

    private Map<LinkedList<Integer>, Double> getPositionCombinationsWithinWindow(Integer num, int i, Map<Integer, String> map, Map<Integer, Double> map2, Map<Integer, Integer> map3, int i2) {
        int maxDistanceBetweenKwds = ((Parameters) this.parameters).getMaxDistanceBetweenKwds();
        int min = Math.min(num.intValue() + (maxDistanceBetweenKwds == 0 ? i : Math.max(((Parameters) this.parameters).getExaminationWindowLength(), i)), i2);
        HashMap hashMap = new HashMap();
        LinkedList linkedList = new LinkedList();
        linkedList.add(num);
        hashMap.put(linkedList, Double.valueOf(1.0d));
        for (int intValue = num.intValue() + 1; intValue < min; intValue++) {
            Integer num2 = map3.get(Integer.valueOf(intValue));
            if (num2 != null && useThisWord(num2, map, map2)) {
                HashMap hashMap2 = new HashMap();
                int intValue2 = intValue - (num.intValue() + 1);
                for (LinkedList linkedList2 : hashMap.keySet()) {
                    if (linkedList2.size() < i && intValue - (((Integer) linkedList2.getLast()).intValue() + 1) <= maxDistanceBetweenKwds) {
                        double d = 1.0d / (1.0d + (intValue2 - ((r0 + 1) - 2)));
                        LinkedList linkedList3 = new LinkedList(linkedList2);
                        linkedList3.add(Integer.valueOf(intValue));
                        hashMap2.put(linkedList3, Double.valueOf(d));
                    }
                }
                hashMap.putAll(hashMap2);
            }
        }
        return hashMap;
    }

    private boolean useThisWord(Integer num, Map<Integer, String> map, Map<Integer, Double> map2) {
        String str = map.get(num);
        return str != null && str.length() >= ((Parameters) this.parameters).getMinWordLength() && map2.get(num).doubleValue() >= ((double) ((Parameters) this.parameters).getMinWordOccurrence());
    }

    private int buildInternalArrays(String str, Map<Integer, String> map, Map<Integer, Double> map2, Map<Integer, Integer> map3) {
        HashMap hashMap = new HashMap();
        List<String> list = generateTokenizer().tokenize(str);
        int i = -1;
        int size = list.size();
        for (int i2 = 0; i2 < size; i2++) {
            String str2 = list.get(i2);
            Integer num = (Integer) hashMap.get(str2);
            if (num == null) {
                i++;
                num = Integer.valueOf(i);
                hashMap.put(str2, num);
                map.put(num, str2);
                map2.put(num, Double.valueOf(0.0d));
            }
            map2.put(num, Double.valueOf(map2.get(num).doubleValue() + 1.0d));
            map3.put(Integer.valueOf(i2), num);
        }
        return size;
    }
}
