/*
 * Decompiled with CFR 0.152.
 */
package org.apdplat.word.corpus;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ExtractText {
    private static final Logger LOGGER = LoggerFactory.getLogger(ExtractText.class);
    private static final AtomicInteger WORD_COUNT = new AtomicInteger();
    private static final AtomicInteger CHAR_COUNT = new AtomicInteger();

    public static void main(String[] stringArray) {
        String string = "data/word.txt";
        String string2 = " ";
        if (stringArray.length == 1) {
            string = stringArray[0];
        }
        if (stringArray.length == 2) {
            string = stringArray[0];
            string2 = stringArray[1];
        }
        ExtractText.extractFromCorpus(string, string2, true);
    }

    public static void extractFromCorpus(String string, String string2, boolean bl) {
        String string3 = "src/main/resources/corpus/corpora.zip";
        LOGGER.info("\u5f00\u59cb\u4ece\u8bed\u6599\u5e93\u4e2d\u62bd\u53d6\u6587\u672c");
        long l = System.currentTimeMillis();
        try {
            ExtractText.analyzeCorpus(string3, string, string2, bl);
        }
        catch (IOException iOException) {
            LOGGER.info("\u62bd\u53d6\u5931\u8d25\uff1a" + iOException.getMessage());
        }
        long l2 = System.currentTimeMillis() - l;
        LOGGER.info("\u5b8c\u6210\u62bd\u53d6\uff0c\u8017\u65f6\uff1a" + l2 + "\u6beb\u79d2");
        LOGGER.info("\u62bd\u53d6\u51fa\u7684\u603b\u5b57\u7b26\u6570\u76ee\u4e3a\uff1a" + CHAR_COUNT.get() + "\uff0c\u603b\u8bcd\u6570\u76ee\u4e3a\uff1a" + WORD_COUNT.get());
    }

    private static void analyzeCorpus(String string, String string2, final String string3, final boolean bl) throws IOException {
        File file = new File(string2);
        if (!file.getParentFile().exists()) {
            file.getParentFile().mkdirs();
        }
        try (FileSystem fileSystem = FileSystems.newFileSystem(Paths.get(string, new String[0]), ExtractText.class.getClassLoader());
             final BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(file), "utf-8"));){
            for (Path path : fileSystem.getRootDirectories()) {
                LOGGER.info("\u5904\u7406\u76ee\u5f55\uff1a" + path);
                Files.walkFileTree(path, (FileVisitor<? super Path>)new SimpleFileVisitor<Path>(){

                    @Override
                    public FileVisitResult visitFile(Path path, BasicFileAttributes basicFileAttributes) throws IOException {
                        LOGGER.info("\u5904\u7406\u6587\u4ef6\uff1a" + path);
                        Path path2 = Paths.get("target/corpus-" + System.currentTimeMillis() + ".txt", new String[0]);
                        Files.copy(path, path2, StandardCopyOption.REPLACE_EXISTING);
                        ExtractText.extractText(path2, bufferedWriter, string3, bl);
                        return FileVisitResult.CONTINUE;
                    }
                });
            }
        }
    }

    private static void extractText(Path path, BufferedWriter bufferedWriter, String string, boolean bl) {
        try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(path.toFile()), "utf-8"));){
            String string2;
            while ((string2 = bufferedReader.readLine()) != null) {
                String[] stringArray;
                if ("".equals(string2 = string2.trim()) || (stringArray = string2.split("\\s+")) == null) continue;
                StringBuilder stringBuilder = new StringBuilder();
                int n = 0;
                boolean bl2 = false;
                for (String string3 : stringArray) {
                    String[] stringArray2 = string3.split("/");
                    if (stringArray2 == null || stringArray2.length < 1) continue;
                    if (stringArray2[0].trim().startsWith("[")) {
                        bl2 = true;
                    }
                    String string4 = stringArray2[0].replace("[", "").replace("]", "").trim();
                    bufferedWriter.write(string4 + string);
                    if (bl2) {
                        stringBuilder.append(string4);
                        ++n;
                    }
                    if (n > 10) {
                        bl2 = false;
                        n = 0;
                        stringBuilder.setLength(0);
                    }
                    if (bl2 && stringArray2.length > 1 && stringArray2[1].trim().endsWith("]")) {
                        bl2 = false;
                        if (bl) {
                            bufferedWriter.write(stringBuilder.toString() + string);
                        }
                        stringBuilder.setLength(0);
                    }
                    WORD_COUNT.incrementAndGet();
                    CHAR_COUNT.addAndGet(string4.length());
                }
                bufferedWriter.write("\n");
            }
        }
        catch (Exception exception) {
            LOGGER.info("\u4ece\u8bed\u6599\u5e93 " + path + " \u4e2d\u62bd\u53d6\u6587\u672c\u5931\u8d25\uff1a", (Throwable)exception);
        }
    }
}

