package com.datumbox.framework.core.utilities.text.analyzers;

import com.datumbox.framework.common.utilities.PHPMethods;
import com.datumbox.framework.common.utilities.StringCleaner;
import com.datumbox.framework.core.utilities.text.extractors.NgramsExtractor;
import com.datumbox.framework.core.utilities.text.parsers.HTMLParser;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;

/* loaded from: input_file:com/datumbox/framework/core/utilities/text/analyzers/TextSimilarity.class */
public class TextSimilarity {
    public static double oliverSimilarity(String str, String str2) {
        preprocessDocument(str);
        preprocessDocument(str2);
        String str3 = str;
        String str4 = str2;
        if (str.length() > str2.length()) {
            str3 = str2;
            str4 = str;
        }
        return PHPSimilarText.similarityPercentage(str3, str4) / 100.0d;
    }

    public static double shinglerSimilarity(String str, String str2, int i) {
        preprocessDocument(str);
        preprocessDocument(str2);
        NgramsExtractor.Parameters parameters = new NgramsExtractor.Parameters();
        parameters.setMaxCombinations(i);
        parameters.setMaxDistanceBetweenKwds(0);
        parameters.setExaminationWindowLength(i);
        NgramsExtractor ngramsExtractor = new NgramsExtractor(parameters);
        Map<String, Double> extract = ngramsExtractor.extract(str);
        Map<String, Double> extract2 = ngramsExtractor.extract(str2);
        filterKeywordCombinations(extract, i);
        filterKeywordCombinations(extract2, i);
        new HashSet(extract.keySet()).addAll(extract2.keySet());
        double size = 0.0d + r0.size();
        new HashSet(extract.keySet()).retainAll(extract2.keySet());
        return (0.0d + r0.size()) / size;
    }

    private static String preprocessDocument(String str) {
        return StringCleaner.removeExtraSpaces(StringCleaner.removeAccents(HTMLParser.extractText(StringCleaner.tokenizeURLs(str))));
    }

    private static void filterKeywordCombinations(Map<String, Double> map, int i) {
        Iterator<Map.Entry<String, Double>> it = map.entrySet().iterator();
        while (it.hasNext()) {
            if (PHPMethods.substr_count(it.next().getKey(), ' ') != i - 1) {
                it.remove();
            }
        }
    }
}
