package helppack;

import info.debatty.java.stringsimilarity.CharacterSubstitutionInterface;
import info.debatty.java.stringsimilarity.WeightedLevenshtein;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/* loaded from: input_file:helppack/Fetch11OCRErrors.class */
public class Fetch11OCRErrors {
    public static ArrayList<String> eafTok;
    public static ArrayList<String> ocrLines;
    public static ArrayList<String> ocrTok;
    public static int paramNumOfOCRTokensAllowedInExcess;
    public static double paramMaxAllowedTokenLev;
    public static int paramMaxAllowedRangeForOCRTok;
    public static WeightedLevenshtein wlOcrold = new WeightedLevenshtein(new CharacterSubstitutionInterface() { // from class: helppack.Fetch11OCRErrors.1
        @Override // info.debatty.java.stringsimilarity.CharacterSubstitutionInterface
        public double cost(char c, char c2) {
            return (c == 218 && c2 == 220) ? 0.5d : 1.0d;
        }
    });
    public static WeightedLevenshtein wlOcr = new WeightedLevenshtein(new CharacterSubstitutionInterface() { // from class: helppack.Fetch11OCRErrors.2
        @Override // info.debatty.java.stringsimilarity.CharacterSubstitutionInterface
        public double cost(char c, char c2) {
            if (c == 'e' && c2 == 233) {
                return 0.9d;
            }
            if (c == 'c' && c2 == 'k') {
                return 0.9d;
            }
            if (c == 'g' && c2 == 231) {
                return 0.9d;
            }
            if (c == '!' && c2 == '.') {
                return 0.9d;
            }
            if (c == 'i' && c2 == 'f') {
                return 0.9d;
            }
            if (c == 'e' && c2 == 234) {
                return 0.9d;
            }
            if (c == 'o' && c2 == 'c') {
                return 0.9d;
            }
            if (c == 237 && c2 == 'f') {
                return 0.1d;
            }
            if (c == 237 && c2 == 'e') {
                return 0.9d;
            }
            if (c == 'o' && c2 == 'd') {
                return 0.9d;
            }
            if (c == 233 && c2 == 232) {
                return 0.9d;
            }
            if (c == 237 && c2 == 'i') {
                return 0.1d;
            }
            if (c == 'i' && c2 == 238) {
                return 0.9d;
            }
            if (c == 237 && c2 == 238) {
                return 0.9d;
            }
            if (c == 237 && c2 == 'l') {
                return 0.9d;
            }
            if (c == 225 && c2 == 252) {
                return 0.9d;
            }
            if (c == 'o' && c2 == 242) {
                return 0.9d;
            }
            if (c == 'o' && c2 == 'u') {
                return 0.9d;
            }
            if (c == 243 && c2 == 'o') {
                return 0.1d;
            }
            if (c == 237 && c2 == 'u') {
                return 0.9d;
            }
            if (c == 'u' && c2 == 'n') {
                return 0.9d;
            }
            if (c == 'o' && c2 == 244) {
                return 0.9d;
            }
            if (c == 243 && c2 == 244) {
                return 0.9d;
            }
            if (c == 243 && c2 == 242) {
                return 0.9d;
            }
            if (c == 'u' && c2 == 'v') {
                return 0.9d;
            }
            if (c == '?' && c2 == '.') {
                return 0.9d;
            }
            if (c == 251 && c2 == 'u') {
                return 0.9d;
            }
            if (c == 218 && c2 == 'U') {
                return 0.9d;
            }
            if (c == 'f' && c2 == 'J') {
                return 0.9d;
            }
            if (c == 281 && c2 == 'a') {
                return 0.9d;
            }
            if (c == 218 && c2 == 220) {
                return 0.1d;
            }
            if (c == 218 && c2 == 219) {
                return 0.9d;
            }
            if (c == 'n' && c2 == 'N') {
                return 0.9d;
            }
            if (c == 230 && c2 == 339) {
                return 0.9d;
            }
            if (c == 193 && c2 == 194) {
                return 0.9d;
            }
            if (c == 226 && c2 == 'a') {
                return 0.9d;
            }
            if (c == '!' && c2 == 'a') {
                return 0.9d;
            }
            if (c == 281 && c2 == 'e') {
                return 0.9d;
            }
            if (c == 226 && c2 == 224) {
                return 0.9d;
            }
            if (c == 281 && c2 == 'f') {
                return 0.9d;
            }
            if (c == 281 && c2 == 'g') {
                return 0.1d;
            }
            if (c == 281 && c2 == 231) {
                return 0.9d;
            }
            if (c == 'h' && c2 == 'b') {
                return 0.9d;
            }
            if (c == 234 && c2 == 'e') {
                return 0.9d;
            }
            if (c == 'e' && c2 == 171) {
                return 0.9d;
            }
            if (c == 238 && c2 == 'i') {
                return 0.1d;
            }
            if (c == 'l' && c2 == 'i') {
                return 0.9d;
            }
            if (c == 250 && c2 == 220) {
                return 0.9d;
            }
            if (c == 205 && c2 == 'I') {
                return 0.9d;
            }
            if (c == 't' && c2 == 'f') {
                return 0.9d;
            }
            if (c == 'E' && c2 == 'U') {
                return 0.9d;
            }
            if (c == 250 && c2 == 'a') {
                return 0.9d;
            }
            if (c == 238 && c2 == 236) {
                return 0.9d;
            }
            if (c == 't' && c2 == 'i') {
                return 0.9d;
            }
            if (c == 244 && c2 == 'o') {
                return 0.9d;
            }
            if (c == 'n' && c2 == 'u') {
                return 0.9d;
            }
            if (c == 193 && c2 == 'a') {
                return 0.9d;
            }
            if (c == 'i' && c2 == 'A') {
                return 0.9d;
            }
            if (c == 244 && c2 == 246) {
                return 0.9d;
            }
            if (c == 'r' && c2 == 'z') {
                return 0.9d;
            }
            if (c == 250 && c2 == 'u') {
                return 0.1d;
            }
            if (c == 'x' && c2 == 'z') {
                return 0.9d;
            }
            if (c == 'U' && c2 == 220) {
                return 0.9d;
            }
            if (c == 'k' && c2 == 'K') {
                return 0.9d;
            }
            if (c == 250 && c2 == 252) {
                return 0.9d;
            }
            if (c == 237 && c2 == 'I') {
                return 0.1d;
            }
            if (c == 250 && c2 == 251) {
                return 0.1d;
            }
            if (c == 225 && c2 == 226) {
                return 0.1d;
            }
            if (c == 225 && c2 == 'a') {
                return 0.1d;
            }
            if (c == 281 && c2 == 167) {
                return 0.1d;
            }
            if (c == 225 && c2 == 224) {
                return 0.9d;
            }
            if (c == 'a' && c2 == 'e') {
                return 0.9d;
            }
            if (c == 'D' && c2 == 'B') {
                return 0.9d;
            }
            if (c == 'c' && c2 == 'e') {
                return 0.9d;
            }
            if (c == 225 && c2 == 228) {
                return 0.9d;
            }
            if (c == 'H' && c2 == 'B') {
                return 0.9d;
            }
            if (c == 225 && c2 == 'i') {
                return 0.9d;
            }
            return (c == 'e' && c2 == 'g') ? 0.9d : 1.0d;
        }
    });
    public static String[][] fileLoc = {new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Boeth.Cons.Prol.4-6_J.eaf", "A_1_3_1", "", "PDF210"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Boeth.Cons.I.6-7_J.eaf", "A_1_7_1", "", "PDF210"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Notker_Boethius_Categoriae/notkbcat-Boeth.Cat.Boeth.Cat.3.eaf", "A_1_367_2", "", "PDF570"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Notker_Boethius_De_Interpretatione_ahd/notkbint-Boeth.Int.Boeth.Int.Praef.3.eaf", "A_1_499_1", "", "PDF702"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Notker_Boethius_De_Interpretatione_ahd/notkbint-Boeth.Int.Boeth.Int.I.5.eaf", "A_1_500_20", "", "PDF703"}};

    public static void main(String[] strArr) throws Exception {
        paramNumOfOCRTokensAllowedInExcess = 100;
        paramMaxAllowedTokenLev = 4.0d;
        paramMaxAllowedRangeForOCRTok = 20;
        new HashMap();
        for (int i = 0; i < fileLoc.length; i++) {
            System.out.println("new File************************************************");
            ocrTok = new ArrayList<>();
            eafTok = new ArrayList<>();
            setEafToks(fileLoc[i][0]);
            String str = fileLoc[i][1];
            int size = eafTok.size();
            paramNumOfOCRTokensAllowedInExcess = (int) (0.3d * eafTok.size());
            setOCRToks("/home/hoenen/workspace/ZHistLex/Allpiper1883_I_LinesNumberedGlossarRed.txt", str, size);
            int i2 = 0;
            while (i2 < eafTok.size() - 1) {
                String str2 = eafTok.get(i2);
                if (!str2.equals(".")) {
                    double d = 100000.0d;
                    int i3 = 100000;
                    int i4 = (i2 + 0) - paramMaxAllowedRangeForOCRTok <= 0 ? 0 : paramMaxAllowedRangeForOCRTok;
                    int i5 = (i2 + 0) + paramMaxAllowedRangeForOCRTok >= eafTok.size() - 1 ? 0 : paramMaxAllowedRangeForOCRTok;
                    for (int i6 = (i2 + 0) - i4; i6 < i2 + 0 + paramMaxAllowedRangeForOCRTok; i6++) {
                        double distance = wlOcr.distance(str2.replaceAll("-\\$", "").replaceAll("&lt;.+?&gt;", ""), ocrTok.get(i6).replaceAll("-", ""));
                        if (distance < d) {
                            d = distance;
                            i3 = i6;
                        }
                    }
                    if (d < paramMaxAllowedTokenLev) {
                        wlOcr.distance(String.valueOf(i2 == 0 ? "" : eafTok.get(i2 - 1)) + "_" + str2 + "_" + (i2 == eafTok.size() - 1 ? "" : eafTok.get(i2 + 1)), String.valueOf(i3 == 0 ? "" : ocrTok.get(i3 - 1)) + "_" + ocrTok.get(i3) + "_" + (i3 == ocrTok.size() - 1 ? "" : ocrTok.get(i3 + 1)));
                    } else {
                        System.out.println(String.valueOf(i2) + "NOT FOUND " + str2);
                    }
                }
                i2++;
            }
        }
    }

    public static void setOCRToks(String str, String str2, int i) throws Exception {
        int i2 = 0;
        ArrayList arrayList = new ArrayList();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), "UTF-8"));
        boolean z = false;
        boolean z2 = false;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.trim().length() != 0) {
                if (readLine.split("#-#")[0].equals(str2) && !z) {
                    z2 = true;
                    z = false;
                }
                if (z2) {
                    String str3 = readLine.endsWith("#-#") ? "" : readLine.split("#-#")[1];
                    i2 += str3.split(" ").length;
                    if (i2 > eafTok.size() + paramNumOfOCRTokensAllowedInExcess) {
                        continue;
                    } else if (str3.trim().length() <= 0) {
                        break;
                    } else {
                        arrayList.add(str3);
                    }
                } else {
                    continue;
                }
            }
        }
        bufferedReader.close();
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            String replaceAll = ((String) arrayList.get(i3)).replaceAll("\\[.+?\\]", "").replaceAll("  +", "");
            if (replaceAll.endsWith("-") && i3 < arrayList.size() - 1) {
                String str4 = ((String) arrayList.get(i3 + 1)).split(" ")[0];
                replaceAll = String.valueOf(replaceAll) + str4;
                arrayList.set(i3 + 1, ((String) arrayList.get(i3 + 1)).substring(str4.length()));
            }
            for (String str5 : replaceAll.split(" ")) {
                if (str5.trim().length() > 0) {
                    ocrTok.add(str5);
                }
            }
        }
    }

    public static void setEafToks(String str) throws Exception {
        File file = new File(str);
        eafTok = new ArrayList<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        boolean z = false;
        PrintWriter printWriter = new PrintWriter("eafTokenLinesAct.txt", "UTF-8");
        ArrayList arrayList = new ArrayList();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (z) {
                if (readLine.trim().equals("</TIER>")) {
                    break;
                } else if (readLine.trim().startsWith("<ANNOTATION_VALUE>")) {
                    arrayList.add(readLine.replaceAll("<.+?>", "").trim());
                }
            }
            if (readLine.trim().equals("<TIER DEFAULT_LOCALE=\"de\" LINGUISTIC_TYPE_REF=\"main-tier\" PARTICIPANT=\"SPK0\" TIER_ID=\"Referenztext W\">")) {
                z = true;
            }
        }
        bufferedReader.close();
        int i = 0;
        while (i < arrayList.size() - 1) {
            String replaceAll = ((String) arrayList.get(i)).replaceAll("^_", "").replaceAll("_$", "");
            if (replaceAll.endsWith("-") || ((String) arrayList.get(i + 1)).startsWith("-")) {
                String str2 = String.valueOf(replaceAll) + ((String) arrayList.get(i + 1));
                eafTok.add(str2);
                printWriter.println(str2);
                i++;
            } else {
                eafTok.add(replaceAll);
                printWriter.println(replaceAll);
            }
            i++;
        }
        eafTok.add((String) arrayList.get(arrayList.size() - 1));
        printWriter.close();
    }

    public static void addToMap(Map<String, Integer> map, String str) throws Exception {
        if (map.containsKey(str)) {
            map.put(str, Integer.valueOf(map.get(str).intValue() + 1));
        } else {
            map.put(str, 1);
        }
    }
}
