/*
 * Decompiled with CFR 0.152.
 */
package TimUtilities.StringUtilities.Filters;

import JavaNotes.TextReader;
import TimUtilities.FileUtilities.FileOutput;
import TimUtilities.StringUtilities.Filters.ImperialPapersFilter;
import TimUtilities.StringUtilities.Filters.StringFilter;
import TimUtilities.StringUtilities.Stemmers.Porter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.PrintStream;
import java.io.Reader;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

public class TextFileProcessor {
    public int nmax = 1000;

    public static void main(String[] args) {
        String outputStemMapFile;
        System.out.println("TextFileProcessor Arguments: inputFileName outputFileName");
        String inputFileName = "input/MSC-2010subclassinputBVNLS.dat";
        int ano = 0;
        if (args.length > ano) {
            inputFileName = args[ano];
        }
        System.out.println("--- Using input file " + inputFileName);
        String outputNameRoot = "output/MSC-2010subclass";
        if (args.length > ++ano) {
            outputNameRoot = args[ano];
        }
        System.out.println("--- Using output file name root " + outputNameRoot);
        int ignoreColumn = 1;
        if (args.length > ++ano) {
            ignoreColumn = Integer.parseInt(args[ano]);
        }
        System.out.println("--- Ignoring column " + ignoreColumn);
        int mode = -1;
        String outputExt = "UNKNOWN";
        if (inputFileName.endsWith("inputELS.dat")) {
            mode = 1;
            outputExt = "outputELS.dat";
        }
        if (inputFileName.endsWith(".txt")) {
            mode = 1;
            outputExt = ".txt";
        }
        if (inputFileName.endsWith("inputBVNLS.dat")) {
            mode = 2;
            outputExt = "outputBVNLS.dat";
        }
        boolean stemmerOn = false;
        if (args.length > ++ano) {
            stemmerOn = StringFilter.trueString(args[ano].charAt(1));
        }
        System.out.println("--- Porter stemming " + StringFilter.onOffString(stemmerOn));
        String inputSep = "\t";
        String sep = "\t";
        TreeMap<String, String> stemMap = new TreeMap<String, String>();
        TreeMap<String, Integer> acceptedCountMap = new TreeMap<String, Integer>();
        ImperialPapersFilter ipf = new ImperialPapersFilter(2, 3, true);
        int sampleFrequency = 1;
        boolean showProcess = true;
        boolean convertIgnoreColumn = false;
        switch (mode) {
            case 1: {
                System.out.println("--- Processing file assuming simple list of words ");
                convertIgnoreColumn = false;
                TextFileProcessor.processWordListFile(inputFileName, outputNameRoot + outputExt, ignoreColumn, convertIgnoreColumn, sep, stemMap, acceptedCountMap, ipf, sampleFrequency, showProcess);
                break;
            }
            case 2: {
                System.out.println("--- Processing file assuming index column ");
                TextFileProcessor.processIndexSentenceFile(inputFileName, outputNameRoot + outputExt, inputSep, sep, stemmerOn, stemMap, acceptedCountMap, ipf, sampleFrequency, showProcess);
                break;
            }
            default: {
                throw new RuntimeException("Unknown mode " + mode);
            }
        }
        System.out.println("Applied Filter " + ipf.description());
        if (stemMap != null) {
            outputStemMapFile = outputNameRoot + "ptStemMap.dat";
            FileOutput.FileOutputMap(outputStemMapFile, sep, stemMap, true);
        }
        if (acceptedCountMap != null) {
            outputStemMapFile = outputNameRoot + "AcceptedCountMap.dat";
            FileOutput.FileOutputMap(outputStemMapFile, sep, acceptedCountMap, true);
        }
        String outputRejectFile = outputNameRoot + "ptRejectList.dat";
        ipf.FileOutputRejectedList(outputRejectFile, showProcess);
    }

    public static void processIndexSentenceFile(String inputFileName, String outputFileName, String inputSep, String sep, boolean stemmerOn, Map<String, String> stemMap, Map<String, Integer> acceptedCountMap, StringFilter sf, int sampleFrequency, boolean showProcess) {
        PrintStream PS;
        TextReader data;
        boolean makeMap = true;
        if (stemMap == null) {
            makeMap = false;
        }
        boolean readSome = true;
        if (sampleFrequency < 2) {
            readSome = false;
        }
        boolean makeAcceptedList = true;
        if (acceptedCountMap == null) {
            makeAcceptedList = false;
        }
        int ccc = -1;
        TreeSet<String> indexSet = new TreeSet<String>();
        Porter stemmer = null;
        if (stemmerOn) {
            stemmer = new Porter();
        }
        try {
            System.out.println(" Opening Input File:  " + inputFileName);
            data = new TextReader((Reader)new FileReader(inputFileName));
        }
        catch (FileNotFoundException e) {
            throw new RuntimeException("Input file " + inputFileName + " not found, " + e);
        }
        try {
            System.out.println(" Opening Output File: " + outputFileName);
            PS = new PrintStream(new FileOutputStream(outputFileName));
        }
        catch (FileNotFoundException e) {
            throw new RuntimeException("Output file " + outputFileName + " not found, " + e);
        }
        int linenumber = 0;
        int column = 0;
        StringBuffer term = new StringBuffer(200);
        int cn = 0;
        try {
            while (!data.eof()) {
                column = 0;
                String inputLine = data.getln();
                if (readSome && ++linenumber % sampleFrequency != 1) continue;
                cn = inputLine.indexOf(inputSep);
                if (cn < 0) {
                    throw new RuntimeException("*** Too few columns on line " + linenumber + " wanted at least two columns.");
                }
                String index = inputLine.substring(0, cn);
                if (indexSet.contains(index)) continue;
                indexSet.add(index);
                PS.print(index);
                if (showProcess) {
                    System.out.print(linenumber + ": \t" + index + "- \t");
                }
                ++cn;
                while (cn < inputLine.length()) {
                    term.setLength(0);
                    while (cn < inputLine.length()) {
                        char c;
                        if ((c = inputLine.charAt(cn++)) >= 'a' && c <= 'z') {
                            term.append(c);
                            continue;
                        }
                        if (c >= 'A' && c <= 'Z') {
                            term.append((char)(c + 32));
                            continue;
                        }
                        if (c >= '0' && c <= '9') {
                            term.append(c);
                            continue;
                        }
                        if (c == '\'' || c == '-' || c == '?') continue;
                    }
                    if (term.length() == 0) continue;
                    String w = term.toString();
                    if (sf.isAcceptableElseRemember(w)) {
                        String s = stemmerOn ? stemmer.stem(w) : w;
                        PS.print(sep + s);
                        if (makeAcceptedList) {
                            ccc = 1;
                            if (acceptedCountMap.containsKey(s)) {
                                ccc = acceptedCountMap.get(s) + 1;
                            }
                            acceptedCountMap.put(s, ccc);
                        }
                        if (showProcess && s.length() != w.length()) {
                            System.out.print("\t" + w + "->" + s + " ");
                        }
                        if (!makeMap || s.length() == w.length()) continue;
                        stemMap.put(w, s);
                        continue;
                    }
                    if (!showProcess) continue;
                    System.out.print("\t" + w + "<-### ");
                }
                if (showProcess) {
                    System.out.println();
                }
                PS.println();
            }
        }
        catch (TextReader.Error e) {
            throw new RuntimeException("*** Input Error on line " + linenumber + " column " + column + ": " + e.getMessage());
        }
        finally {
            data.close();
            PS.close();
        }
        System.out.println(" Finished " + outputFileName + " processed " + indexSet.size() + " sentences, stemmed " + stemMap.size() + " words and filtered out " + sf.numberRejectedString() + " words");
    }

    public static void processWordListFile(String inputFileName, String outputFileName, int ignoreColumn, boolean convertIgnoreColumn, String sep, Map<String, String> stemMap, Map<String, Integer> acceptedCountMap, StringFilter sf, int sampleFrequency, boolean showProcess) {
        PrintStream PS;
        TextReader data;
        boolean makeMap = true;
        if (stemMap == null) {
            makeMap = false;
        }
        boolean makeAcceptedList = true;
        if (acceptedCountMap == null) {
            makeAcceptedList = false;
        }
        int ccc = -1;
        boolean readSome = true;
        if (sampleFrequency < 2) {
            readSome = false;
        }
        Porter stemmer = new Porter();
        try {
            System.out.println(" Opening Input File:  " + inputFileName);
            data = new TextReader((Reader)new FileReader(inputFileName));
        }
        catch (FileNotFoundException e) {
            throw new RuntimeException("Input file " + inputFileName + " not found, " + e);
        }
        try {
            System.out.println(" Opening Output File: " + outputFileName);
            PS = new PrintStream(new FileOutputStream(outputFileName));
        }
        catch (FileNotFoundException e) {
            throw new RuntimeException("Output file " + outputFileName + " not opened, " + e);
        }
        int linenumber = 0;
        int column = 0;
        try {
            while (!data.eof()) {
                column = 0;
                if (readSome && ++linenumber % sampleFrequency != 1) {
                    data.getln();
                    continue;
                }
                if (showProcess) {
                    System.out.print(linenumber + ": ");
                }
                while (!data.eoln()) {
                    String w = data.getWord();
                    ++column;
                    if (showProcess) {
                        System.out.print("\t" + w);
                    }
                    if (column == ignoreColumn) {
                        if (convertIgnoreColumn) {
                            PS.print(linenumber - 1 + sep);
                            continue;
                        }
                        PS.print(w + sep);
                        continue;
                    }
                    if (sf.isAcceptableElseRemember(w)) {
                        String s = stemmer.stem(w);
                        PS.print(s + sep);
                        if (makeAcceptedList) {
                            ccc = 1;
                            if (acceptedCountMap.containsKey(s)) {
                                ccc = acceptedCountMap.get(s) + 1;
                            }
                            acceptedCountMap.put(s, ccc);
                        }
                        if (showProcess && s.length() != w.length()) {
                            System.out.print("->" + s);
                        }
                        if (!makeMap || s.length() == w.length()) continue;
                        stemMap.put(w, s);
                        continue;
                    }
                    if (!showProcess) continue;
                    System.out.print("<-### " + sep);
                }
                if (showProcess) {
                    System.out.println();
                }
                PS.println();
            }
        }
        catch (TextReader.Error e) {
            throw new RuntimeException("*** Input Error on line " + linenumber + " column " + column + ": " + e.getMessage());
        }
        finally {
            data.close();
            PS.close();
        }
        System.out.println(" Finished " + outputFileName + " stemmed " + stemMap.size() + " words and filtered out " + sf.numberRejectedString() + " words");
    }

    public static void printMap(PrintStream PS, String sep, Map<String, String> stemMap) {
        Set<String> keys = stemMap.keySet();
        for (String k : keys) {
            PS.println(k + sep + stemMap.get(k));
        }
    }

    public static boolean isAlphabetic(String s) {
        for (int c = 0; c < s.length(); ++c) {
            if (Character.isLetter(s.charAt(c))) continue;
            return false;
        }
        return true;
    }
}

