/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.eval.app.tools;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Set;
import org.apache.tika.eval.core.tokens.CommonTokenCountManager;

public class CommonTokenOverlapCounter {
    public static void main(String[] args) throws Exception {
        Path commonTokensDir = Paths.get(args[0], new String[0]);
        CommonTokenOverlapCounter counter = new CommonTokenOverlapCounter();
        counter.execute(commonTokensDir);
    }

    private void execute(Path commonTokensDir) throws IOException {
        ArrayList<String> langs = new ArrayList<String>();
        for (File f : commonTokensDir.toFile().listFiles()) {
            langs.add(f.getName());
        }
        CommonTokenCountManager mgr = new CommonTokenCountManager(commonTokensDir, "");
        for (int i = 0; i < langs.size() - 1; ++i) {
            for (int j = i + 1; j < langs.size(); ++j) {
                this.compare((String)langs.get(i), (String)langs.get(j), mgr);
            }
        }
    }

    private void compare(String langA, String langB, CommonTokenCountManager mgr) {
        int overlap = 0;
        int denom = 0;
        Set<String> setA = mgr.getTokens(langA);
        Set<String> setB = mgr.getTokens(langB);
        for (String a : setA) {
            if (!setB.contains(a)) continue;
            overlap += 2;
        }
        denom = setA.size() + setB.size();
        double percent = (double)overlap / (double)denom;
        if (percent > 0.01) {
            System.out.printf(Locale.US, "%s %s %.2f%n", langA, langB, percent);
        }
    }
}

