package ap7Visu;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.util.HashMap;

/* loaded from: input_file:ap7Visu/ExtractDTAtexts.class */
public class ExtractDTAtexts {
    public static void main(String[] strArr) throws Exception {
        HashMap hashMap = new HashMap();
        File file = new File("/home/hoenen/Dokumente/ZHistLex/AP7/VisualisierungenProduction/DTAgefälligstTimeordered200.txt");
        PrintWriter printWriter = new PrintWriter("corpDTAGef/Textkompass.txt");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        String str = "";
        String str2 = "";
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.trim().startsWith("+ basename=")) {
                if (str.trim().length() != 0) {
                    hashMap.put(str, str2);
                }
                str = readLine.split("\"")[1];
                str2 = "unknown";
            } else if (readLine.trim().startsWith("+ date_=")) {
                str2 = readLine.split("\"")[1];
            }
        }
        bufferedReader.close();
        hashMap.put(str, str2);
        int i = 0;
        for (String str3 : hashMap.keySet()) {
            printWriter.println(String.valueOf(str3) + "\t" + i + ".txt");
            System.out.println("processing:..." + str3);
            try {
                BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(new URL("http://www.deutschestextarchiv.de/book/download_txt/" + str3).openStream(), "UTF-8"));
                StringBuffer stringBuffer = new StringBuffer();
                while (true) {
                    String readLine2 = bufferedReader2.readLine();
                    if (readLine2 == null) {
                        break;
                    }
                    if (readLine2.trim().length() > 0 && !readLine2.trim().matches("\\[[A-Z]?/?[0-9/]+\\]")) {
                        if (readLine2.endsWith("-")) {
                            stringBuffer.append(readLine2.substring(0, readLine2.length() - 2));
                        } else {
                            stringBuffer.append(String.valueOf(readLine2) + "\n");
                        }
                    }
                }
                bufferedReader2.close();
                PrintWriter printWriter2 = new PrintWriter("corpDTAGef/" + i + ".txt", "UTF-8");
                printWriter2.println(stringBuffer.toString().replaceAll("ſ", "s").replaceAll(" Jn", " In").replaceAll("\n", " ").replaceAll(",", " , ").replaceAll("\\.\\.+", "…").replaceAll("\\.", " . ").replaceAll(";", " ; ").replaceAll("\f", "").replaceAll("„", " „ ").replaceAll("“", " “ ").replaceAll("\"", " \" ").replaceAll(":", " : ").replaceAll("!", " ! ").replaceAll("\\?", " ? ").replaceAll("\\(", "( ").replaceAll("\\)", " )").replaceAll("…", " … ").replaceAll("  +", " ").replaceAll("\\.", ".\n").replaceAll("\\?", "?\n").replaceAll("!", "!\n").replaceAll(":", ":\n").replaceAll("\n +", "\n").replaceAll(" +\n", "\n").trim());
                printWriter2.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
            i++;
        }
        printWriter.close();
    }
}
