package testpack;

import info.debatty.java.stringsimilarity.CharacterSubstitutionInterface;
import info.debatty.java.stringsimilarity.WeightedLevenshtein;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;

/* loaded from: input_file:testpack/TestNewAlign2.class */
public class TestNewAlign2 {
    public static ArrayList<String> eafTok;
    public static ArrayList<String> ocrLines;
    public static ArrayList<String> ocrTok;
    public static ArrayList<String> lbs;
    public static ArrayList<String> pbs;
    public static int paramNumOfOCRTokensAllowedInExcess;
    public static double paramMaxAllowedTokenLev;
    public static int paramMaxAllowedRangeForOCRTok;
    public static String[][] fileLoc = {new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Boeth.Cons.Prol.4-6_J.eaf", "A_1_3_1", "", "PDF210"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Boeth.Cons.I.6-7_J.eaf", "A_1_7_1", "", "PDF210"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Notker_Boethius_Categoriae/notkbcat-Boeth.Cat.Boeth.Cat.3.eaf", "A_1_367_2", "", "PDF570"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Notker_Boethius_De_Interpretatione_ahd/notkbint-Boeth.Int.Boeth.Int.Praef.3.eaf", "A_1_499_1", "", "PDF702"}, new String[]{"/home/hoenen/Dokumente/ZHistLex/Daten/Notker_vonRoland/Notker_Boethius_De_Interpretatione_ahd/notkbint-Boeth.Int.Boeth.Int.I.5.eaf", "A_1_500_20", "", "PDF703"}};
    public static WeightedLevenshtein wlOcr = new WeightedLevenshtein(new CharacterSubstitutionInterface() { // from class: testpack.TestNewAlign2.1
        @Override // info.debatty.java.stringsimilarity.CharacterSubstitutionInterface
        public double cost(char c, char c2) {
            if (c == 'e' && c2 == 233) {
                return 0.9d;
            }
            if (c == 'c' && c2 == 'k') {
                return 0.9d;
            }
            if (c == 'g' && c2 == 231) {
                return 0.9d;
            }
            if (c == '!' && c2 == '.') {
                return 0.9d;
            }
            if (c == 'i' && c2 == 'f') {
                return 0.9d;
            }
            if (c == 'e' && c2 == 234) {
                return 0.9d;
            }
            if (c == 'o' && c2 == 'c') {
                return 0.9d;
            }
            if (c == 237 && c2 == 'f') {
                return 0.1d;
            }
            if (c == 237 && c2 == 'e') {
                return 0.9d;
            }
            if (c == 'o' && c2 == 'd') {
                return 0.9d;
            }
            if (c == 233 && c2 == 232) {
                return 0.9d;
            }
            if (c == 237 && c2 == 'i') {
                return 0.1d;
            }
            if (c == 'i' && c2 == 238) {
                return 0.9d;
            }
            if (c == 237 && c2 == 238) {
                return 0.9d;
            }
            if (c == 237 && c2 == 'l') {
                return 0.9d;
            }
            if (c == 225 && c2 == 252) {
                return 0.9d;
            }
            if (c == 'o' && c2 == 242) {
                return 0.9d;
            }
            if (c == 'o' && c2 == 'u') {
                return 0.9d;
            }
            if (c == 243 && c2 == 'o') {
                return 0.1d;
            }
            if (c == 237 && c2 == 'u') {
                return 0.9d;
            }
            if (c == 'u' && c2 == 'n') {
                return 0.9d;
            }
            if (c == 'o' && c2 == 244) {
                return 0.9d;
            }
            if (c == 243 && c2 == 244) {
                return 0.9d;
            }
            if (c == 243 && c2 == 242) {
                return 0.9d;
            }
            if (c == 'u' && c2 == 'v') {
                return 0.9d;
            }
            if (c == '?' && c2 == '.') {
                return 0.9d;
            }
            if (c == 251 && c2 == 'u') {
                return 0.9d;
            }
            if (c == 218 && c2 == 'U') {
                return 0.9d;
            }
            if (c == 'f' && c2 == 'J') {
                return 0.9d;
            }
            if (c == 281 && c2 == 'a') {
                return 0.9d;
            }
            if (c == 218 && c2 == 220) {
                return 0.1d;
            }
            if (c == 218 && c2 == 219) {
                return 0.9d;
            }
            if (c == 'n' && c2 == 'N') {
                return 0.9d;
            }
            if (c == 230 && c2 == 339) {
                return 0.9d;
            }
            if (c == 193 && c2 == 194) {
                return 0.9d;
            }
            if (c == 226 && c2 == 'a') {
                return 0.9d;
            }
            if (c == '!' && c2 == 'a') {
                return 0.9d;
            }
            if (c == 281 && c2 == 'e') {
                return 0.9d;
            }
            if (c == 226 && c2 == 224) {
                return 0.9d;
            }
            if (c == 281 && c2 == 'f') {
                return 0.9d;
            }
            if (c == 281 && c2 == 'g') {
                return 0.1d;
            }
            if (c == 281 && c2 == 231) {
                return 0.9d;
            }
            if (c == 'h' && c2 == 'b') {
                return 0.9d;
            }
            if (c == 234 && c2 == 'e') {
                return 0.9d;
            }
            if (c == 'e' && c2 == 171) {
                return 0.9d;
            }
            if (c == 238 && c2 == 'i') {
                return 0.1d;
            }
            if (c == 'l' && c2 == 'i') {
                return 0.9d;
            }
            if (c == 250 && c2 == 220) {
                return 0.9d;
            }
            if (c == 205 && c2 == 'I') {
                return 0.9d;
            }
            if (c == 't' && c2 == 'f') {
                return 0.9d;
            }
            if (c == 'E' && c2 == 'U') {
                return 0.9d;
            }
            if (c == 250 && c2 == 'a') {
                return 0.9d;
            }
            if (c == 238 && c2 == 236) {
                return 0.9d;
            }
            if (c == 't' && c2 == 'i') {
                return 0.9d;
            }
            if (c == 244 && c2 == 'o') {
                return 0.9d;
            }
            if (c == 'n' && c2 == 'u') {
                return 0.9d;
            }
            if (c == 193 && c2 == 'a') {
                return 0.9d;
            }
            if (c == 'i' && c2 == 'A') {
                return 0.9d;
            }
            if (c == 244 && c2 == 246) {
                return 0.9d;
            }
            if (c == 'r' && c2 == 'z') {
                return 0.9d;
            }
            if (c == 250 && c2 == 'u') {
                return 0.1d;
            }
            if (c == 'x' && c2 == 'z') {
                return 0.9d;
            }
            if (c == 'U' && c2 == 220) {
                return 0.9d;
            }
            if (c == 'k' && c2 == 'K') {
                return 0.9d;
            }
            if (c == 250 && c2 == 252) {
                return 0.9d;
            }
            if (c == 237 && c2 == 'I') {
                return 0.1d;
            }
            if (c == 250 && c2 == 251) {
                return 0.1d;
            }
            if (c == 225 && c2 == 226) {
                return 0.1d;
            }
            if (c == 225 && c2 == 'a') {
                return 0.1d;
            }
            if (c == 281 && c2 == 167) {
                return 0.1d;
            }
            if (c == 225 && c2 == 224) {
                return 0.9d;
            }
            if (c == 'a' && c2 == 'e') {
                return 0.9d;
            }
            if (c == 'D' && c2 == 'B') {
                return 0.9d;
            }
            if (c == 'c' && c2 == 'e') {
                return 0.9d;
            }
            if (c == 225 && c2 == 228) {
                return 0.9d;
            }
            if (c == 'H' && c2 == 'B') {
                return 0.9d;
            }
            if (c == 225 && c2 == 'i') {
                return 0.9d;
            }
            return (c == 'e' && c2 == 'g') ? 0.9d : 1.0d;
        }
    });

    public static void main(String[] strArr) throws Exception {
        String str;
        String str2;
        paramNumOfOCRTokensAllowedInExcess = 100;
        paramMaxAllowedTokenLev = 3.0d;
        paramMaxAllowedRangeForOCRTok = 200;
        for (int i = 0; i < fileLoc.length; i++) {
            lbs = new ArrayList<>();
            pbs = new ArrayList<>();
            System.out.println("new File************************************************");
            ocrTok = new ArrayList<>();
            eafTok = new ArrayList<>();
            setEafToks(fileLoc[i][0]);
            String str3 = fileLoc[i][1];
            int size = eafTok.size();
            paramNumOfOCRTokensAllowedInExcess = (int) (0.4d * eafTok.size());
            setOCRToks("/home/hoenen/workspace/ZHistLex/Allpiper1883_I_LinesNumberedGlossarRed.txt", str3, size);
            String[] strArr2 = new String[ocrTok.size()];
            int i2 = 0;
            for (int i3 = 0; i3 < ocrTok.size(); i3++) {
                String str4 = ocrTok.get(i3);
                String str5 = "";
                String str6 = str4;
                boolean z = false;
                boolean z2 = false;
                if (i3 > 0 && ocrTok.get(i3 - 1).endsWith("-")) {
                    str6 = String.valueOf(ocrTok.get(i3 - 1)) + str4;
                    z = true;
                }
                if (i3 < ocrTok.size() - 1 && ocrTok.get(i3 + 1).startsWith("-")) {
                    str6 = String.valueOf(str4) + ocrTok.get(i3 + 1);
                    z2 = true;
                }
                if (i3 > 0) {
                    if (!z) {
                        str5 = String.valueOf(str5) + ocrTok.get(i3 - 1) + " ";
                    } else if (i3 > 1) {
                        str5 = String.valueOf(str5) + ocrTok.get(i3 - 2) + " ";
                    }
                }
                String str7 = String.valueOf(str5) + str6 + " ";
                if (i3 < ocrTok.size() - 1) {
                    if (!z2) {
                        str7 = String.valueOf(str7) + ocrTok.get(i3 + 1);
                    } else if (i3 < ocrTok.size() - 2) {
                        str7 = String.valueOf(str7) + ocrTok.get(i3 + 2);
                    }
                }
                int i4 = i2 <= paramMaxAllowedRangeForOCRTok ? 0 : i2 - paramMaxAllowedRangeForOCRTok;
                int size2 = i2 > eafTok.size() - paramMaxAllowedRangeForOCRTok ? eafTok.size() : i2 + paramMaxAllowedRangeForOCRTok;
                double d = 100000.0d;
                String str8 = "";
                String str9 = "";
                String str10 = "";
                for (int i5 = i4; i5 < size2; i5++) {
                    String str11 = eafTok.get(i5);
                    String str12 = "";
                    boolean z3 = false;
                    boolean z4 = false;
                    if (!str11.contains("_") || str11.startsWith("_") || str11.endsWith("_")) {
                        str = str11;
                        if (i5 > 0 && (eafTok.get(i5 - 1).endsWith("-") || eafTok.get(i5 - 1).endsWith("_"))) {
                            str = String.valueOf(eafTok.get(i5 - 1)) + str11;
                            z3 = true;
                        }
                        if (i5 < eafTok.size() - 1 && (eafTok.get(i5 + 1).startsWith("-") || eafTok.get(i5 + 1).startsWith("_"))) {
                            str = String.valueOf(str11) + eafTok.get(i5 + 1);
                            z4 = true;
                        }
                        if (i5 > 0) {
                            if (!z3) {
                                str12 = String.valueOf(str12) + eafTok.get(i5 - 1).split("_")[eafTok.get(i5 - 1).split("_").length - 1] + " ";
                            } else if (i5 > 1) {
                                str12 = String.valueOf(str12) + eafTok.get(i5 - 2).split("_")[eafTok.get(i5 - 2).split("_").length - 1] + " ";
                            }
                        }
                        str2 = String.valueOf(str12) + str11 + " ";
                        if (i5 < eafTok.size() - 1) {
                            if (!z4) {
                                str2 = String.valueOf(str2) + eafTok.get(i5 + 1).split("_")[0];
                            } else if (i5 < eafTok.size() - 2) {
                                str2 = String.valueOf(str2) + eafTok.get(i5 + 2).split("_")[0];
                            }
                        }
                    } else {
                        str = str11.split("_")[0];
                        str2 = String.valueOf(str) + " " + str11.split("_")[1] + " ";
                        if (i5 < eafTok.size() - 1) {
                            str2 = String.valueOf(str2) + eafTok.get(i5 + 1).replaceAll("^_", "").split("_")[0];
                        }
                    }
                    double distance = wlOcr.distance(str7, str2);
                    double distance2 = wlOcr.distance(str6, str);
                    if (distance == d) {
                        str9 = String.valueOf(str9) + ";" + str11;
                        str8 = String.valueOf(str8) + ";" + distance2;
                        str10 = String.valueOf(str10) + ";" + i5;
                    }
                    if (distance < d) {
                        d = distance;
                        str9 = str11;
                        str8 = String.valueOf("") + distance2;
                        str10 = String.valueOf("") + i5;
                    }
                }
                if (str9.trim().length() == 0) {
                }
                if (str9.contains(";")) {
                    String[] split = str10.split(";");
                    boolean z5 = false;
                    boolean z6 = false;
                    boolean z7 = false;
                    for (int i6 = 0; i6 < split.length; i6++) {
                        if (Integer.parseInt(split[i6]) == i2 + 1 || Integer.parseInt(split[i6]) == i2 || Integer.parseInt(split[i6]) == i2 - 1) {
                            if (str9.split(";")[i6].equals(ocrTok.get(i3))) {
                                z7 = true;
                                strArr2[i3] = split[i6];
                            }
                            if (z5) {
                                z6 = true;
                            }
                            z5 = true;
                        }
                    }
                    if (!z7) {
                        if (z5) {
                            if (z6) {
                                String str13 = "";
                                String str14 = "";
                                String str15 = "";
                                for (int i7 = 0; i7 < split.length; i7++) {
                                    if (Integer.parseInt(split[i7]) == i2 + 1 || Integer.parseInt(split[i7]) == i2 || Integer.parseInt(split[i7]) == i2 - 1) {
                                        str13 = String.valueOf(str13) + str9.split(";")[i7] + ";";
                                        str14 = String.valueOf(str14) + str8.split(";")[i7] + ";";
                                        str15 = String.valueOf(str15) + str10.split(";")[i7] + ";";
                                    }
                                }
                                double distance3 = wlOcr.distance(ocrTok.get(i3), str13.replaceAll(";", ""));
                                boolean z8 = true;
                                for (int i8 = 0; i8 < str14.split(";").length; i8++) {
                                    if (Double.parseDouble(str14.split(";")[i8]) < distance3) {
                                        z8 = false;
                                    }
                                }
                                if (z8) {
                                    strArr2[i3] = str15;
                                } else {
                                    String[] split2 = str13.split(";");
                                    boolean z9 = false;
                                    for (int i9 = 0; i9 < split2.length; i9++) {
                                        if (split2[i9].startsWith(ocrTok.get(i3)) || ocrTok.get(i3).startsWith(split2[i9])) {
                                            int i10 = i3;
                                            strArr2[i10] = String.valueOf(strArr2[i10]) + str15.split(";")[i9];
                                            z9 = true;
                                        }
                                    }
                                    if (!z9) {
                                        double d2 = 100.0d;
                                        String str16 = "";
                                        for (int i11 = 0; i11 < split2.length; i11++) {
                                            if (Double.parseDouble(str14.split(";")[i11]) < d2) {
                                                d2 = Double.parseDouble(str14.split(";")[i11]);
                                                str16 = str15.split(";")[i11];
                                            }
                                        }
                                        strArr2[i3] = str16;
                                    }
                                }
                            } else {
                                for (int i12 = 0; i12 < split.length; i12++) {
                                    if (Integer.parseInt(split[i12]) == i2 + 1 || Integer.parseInt(split[i12]) == i2 || Integer.parseInt(split[i12]) == i2 - 1) {
                                        strArr2[i3] = split[i12];
                                    }
                                }
                            }
                        } else if (i2 < eafTok.size()) {
                            strArr2[i3] = "";
                        } else {
                            strArr2[i3] = "OUT OF END ?";
                        }
                    }
                } else if (str9.trim().length() > 0) {
                    if (d == 0.0d && Double.parseDouble(str8) == 0.0d) {
                        strArr2[i3] = str10;
                        i2 = Integer.parseInt(str10);
                    } else if (d >= 9.0d || Double.parseDouble(str8) > 3.0d || i2 - Double.parseDouble(str10) >= 20.0d) {
                        strArr2[i3] = "";
                    } else {
                        strArr2[i3] = str10;
                    }
                } else if (i2 <= eafTok.size()) {
                    System.out.println("Warning, token not found! " + ocrTok.get(i3));
                } else {
                    strArr2[i3] = "OUT OF END?";
                }
                i2++;
            }
            PrintWriter printWriter = new PrintWriter(String.valueOf(i) + "alignNiceFormat.txt", "UTF-8");
            for (int i13 = 0; i13 < ocrTok.size(); i13++) {
                String str17 = "";
                if (strArr2[i13].contains(";")) {
                    for (String str18 : strArr2[i13].split(";")) {
                        str17 = String.valueOf(str17) + str18 + "#";
                    }
                } else {
                    str17 = (strArr2[i13].trim().length() <= 0 || strArr2[i13].contains("OUT OF")) ? !strArr2[i13].contains("OUT OF") ? "#UNK#" : strArr2[i13] : eafTok.get(Integer.parseInt(strArr2[i13].split(";")[0]));
                }
                printWriter.println(String.valueOf(i13) + "\t" + ocrTok.get(i13) + "\t" + strArr2[i13] + "\t" + lbs.get(i13) + "\t" + pbs.get(i13) + "\t" + str17);
            }
            printWriter.close();
        }
    }

    public static String[] cleanElanIds(String[] strArr) throws Exception {
        int i;
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < strArr.length; i4++) {
            int i5 = 0;
            String str = strArr[i4];
            if (str.trim().length() != 0 && !str.contains("OUT OF")) {
                if (str.contains(";")) {
                    str = str.split(";")[str.split(";").length - 1];
                }
                int parseInt = Integer.parseInt(str);
                for (int i6 = i4 + 1; i6 < strArr.length; i6++) {
                    String str2 = strArr[i6];
                    if (str2.trim().length() == 0 || str2.contains("OUT OF")) {
                        break;
                    }
                    if (!str2.contains(";")) {
                        int parseInt2 = Integer.parseInt(str2);
                        if (parseInt != parseInt2 && parseInt + 1 != parseInt2) {
                            break;
                        }
                        i5++;
                        i = parseInt2;
                        parseInt = i;
                    } else {
                        String[] split = str2.split(";");
                        int parseInt3 = Integer.parseInt(split[0]);
                        int parseInt4 = Integer.parseInt(split[str2.split(";").length - 1]);
                        if (parseInt != parseInt3 && parseInt + 1 != parseInt3) {
                            break;
                        }
                        i5++;
                        i = parseInt4;
                        parseInt = i;
                    }
                }
                if (i5 > i2) {
                    i2 = i5;
                    i3 = i4;
                }
            }
        }
        eafTok.get(eafTok.size() - 1);
        String str3 = String.valueOf(eafTok.get(eafTok.size() - 3)) + " " + eafTok.get(eafTok.size() - 2) + " " + eafTok.get(eafTok.size() - 1);
        int parseInt5 = Integer.parseInt(strArr[i3 + i2]);
        for (int i7 = i3 + i2 + 1; i7 < strArr.length; i7++) {
            if (Integer.parseInt(strArr[i7]) == parseInt5 + 1) {
                parseInt5++;
            } else if (Integer.parseInt(strArr[i7]) == parseInt5) {
            }
        }
        return new String[0];
    }

    public static void setOCRToks(String str, String str2, int i) throws Exception {
        int i2 = 0;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), "UTF-8"));
        boolean z = false;
        boolean z2 = false;
        String str3 = "1";
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.trim().length() != 0) {
                String str4 = readLine.split("#-#")[0];
                String str5 = str4.split("_")[2];
                String str6 = str4.split("_")[3];
                if (str4.equals(str2) && !z) {
                    z2 = true;
                    z = false;
                }
                if (z2) {
                    String[] split = (readLine.endsWith("#-#") ? "" : readLine.split("#-#")[1].trim().replaceAll("\\[.+?\\]", "").replaceAll("  +", "")).split(" ");
                    i2 += split.length;
                    if (i2 > eafTok.size() + paramNumOfOCRTokensAllowedInExcess) {
                        break;
                    }
                    for (int i3 = 0; i3 < split.length; i3++) {
                        if (str5.trim().equals(str3.trim())) {
                            pbs.add("-");
                        } else {
                            pbs.add(str5.trim());
                        }
                        ocrTok.add(split[i3]);
                        if (i3 == 0) {
                            lbs.add(str6);
                        } else {
                            lbs.add("-");
                        }
                    }
                    str3 = str5;
                } else {
                    continue;
                }
            }
        }
        bufferedReader.close();
    }

    public static void setEafToks(String str) throws Exception {
        File file = new File(str);
        eafTok = new ArrayList<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        boolean z = false;
        PrintWriter printWriter = new PrintWriter("eafTokenLinesAct.txt", "UTF-8");
        ArrayList<String> arrayList = new ArrayList<>();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (z) {
                if (readLine.trim().equals("</TIER>")) {
                    break;
                }
                if (readLine.trim().startsWith("<ANNOTATION_VALUE>")) {
                    String trim = readLine.replaceAll("<.+?>", "").trim();
                    arrayList.add(trim);
                    printWriter.println(trim);
                }
            }
            if (readLine.trim().equals("<TIER DEFAULT_LOCALE=\"de\" LINGUISTIC_TYPE_REF=\"main-tier\" PARTICIPANT=\"SPK0\" TIER_ID=\"Referenztext W\">")) {
                z = true;
            }
        }
        bufferedReader.close();
        eafTok = arrayList;
        printWriter.close();
    }
}
