/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.dictionary.assertion;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.apache.ctakes.core.nlp.tokenizer.OffsetComparator;
import org.apache.ctakes.core.nlp.tokenizer.Token;
import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class CreateAssertionLuceneIndexFromDelimitedFile {
    private static TokenizerPTB tokenizer = new TokenizerPTB();
    private static String directoryOfDelimitedFiles = null;
    private IndexWriter iwriter = null;
    private int idCount = 0;
    private final String ID = "UNIQUE_DOCUMENT_IDENTIFIER_FIELD";
    private final String rxNormCode = "codeRxNorm";
    private final String Code = "code";
    private final String CodeToken = "codeTokenized";
    private final String FirstWord = "first_word";
    private final String OtherDesig = "other_designation";
    private final String PreferDesig = "preferred_designation";
    public static final String CUE_PHRASE_FIELD_NAME = "cuePhrase";
    public static final String CUE_PHRASE_CATEGORY_FIELD_NAME = "cuePhraseCategory";
    public static final String CUE_PHRASE_FAMILY_FIELD_NAME = "cuePhraseFamily";
    public static final String CUE_PHRASE_FIRST_WORD_FIELD_NAME = "cuePhraseFirstWord";

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public CreateAssertionLuceneIndexFromDelimitedFile(TokenizerPTB tokenizer) throws Exception {
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
        String defaultLoc = new File(directoryOfDelimitedFiles).getAbsolutePath();
        boolean error = false;
        long numEntries = 0L;
        try {
            FSDirectory directory = FSDirectory.open((File)new File(new File(defaultLoc).getParent() + "/assertion_cue_phrase_index"));
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_40, (Analyzer)analyzer);
            this.iwriter = new IndexWriter((Directory)directory, indexWriterConfig);
            File file = new File(defaultLoc);
            if (file.isDirectory()) {
                String[] processFiles = file.list();
                for (int i = 0; i < processFiles.length; ++i) {
                    System.out.println("Process Each File in " + file.getName() + "...");
                    File nextFile = new File(directoryOfDelimitedFiles + "/" + processFiles[i]);
                    BufferedReader br = new BufferedReader(new FileReader(nextFile));
                    String record = "";
                    while ((record = br.readLine()) != null) {
                        String[] splitRecord = record.split("\\|");
                        if (splitRecord.length == 0) continue;
                        String cuePhrase = splitRecord[0];
                        String cuePhraseCategory = "default_category";
                        String cuePhraseFamily = "default_family";
                        if (splitRecord.length >= 2) {
                            cuePhraseCategory = splitRecord[1];
                            cuePhraseFamily = splitRecord[2];
                            if (cuePhraseCategory == null || cuePhraseCategory.isEmpty()) {
                                cuePhraseCategory = "category__" + cuePhraseFamily;
                            }
                        }
                        this.writeToFormatLucene(cuePhrase, cuePhraseCategory, cuePhraseFamily);
                        ++numEntries;
                    }
                }
            }
        }
        catch (IOException io) {
            System.out.println("IO exception caught");
            error = true;
        }
        finally {
            try {
                this.iwriter.maybeMerge();
                this.iwriter.close();
                if (!error) {
                    System.out.println("Index created with " + numEntries + " entries.");
                }
            }
            catch (IOException io) {
                System.out.println("IO exception caught");
            }
        }
    }

    public static void main(String[] args) {
        System.gc();
        if (args.length == 1) {
            try {
                directoryOfDelimitedFiles = args[0];
                tokenizer = new TokenizerPTB();
                new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        } else if (args.length == 3) {
            try {
                directoryOfDelimitedFiles = args[0];
                tokenizer = new TokenizerPTB();
                new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.out.println(CreateAssertionLuceneIndexFromDelimitedFile.getUsage());
        }
    }

    public static String load(String filename) throws FileNotFoundException, IOException {
        String msg = "";
        File f = new File(filename);
        BufferedReader br = new BufferedReader(new FileReader(f));
        String line = br.readLine();
        while (line != null) {
            msg = msg + line + "\n";
            line = br.readLine();
        }
        br.close();
        return msg;
    }

    public static Map loadHyphMap(String filename) throws FileNotFoundException, IOException {
        HashMap<String, Integer> hyphMap = new HashMap<String, Integer>();
        File f = new File(filename);
        BufferedReader br = new BufferedReader(new FileReader(f));
        String line = br.readLine();
        while (line != null) {
            StringTokenizer st = new StringTokenizer(line, "|");
            if (st.countTokens() == 2) {
                String hyphWord = st.nextToken();
                Integer freq = new Integer(st.nextToken());
                hyphMap.put(hyphWord.toLowerCase(), freq);
            } else {
                System.out.println("Invalid hyphen file line: " + line);
            }
            line = br.readLine();
        }
        br.close();
        return hyphMap;
    }

    public static void printResults(String text, List results) {
        System.out.println("Text: " + text);
        for (int i = 0; i < results.size(); ++i) {
            Token t = (Token)results.get(i);
            String typeStr = "";
            switch (t.getType()) {
                case 1: {
                    typeStr = "word       ";
                    break;
                }
                case 3: {
                    typeStr = "punctuation";
                    break;
                }
                case 2: {
                    typeStr = "number     ";
                    break;
                }
                case 4: {
                    typeStr = "end of line";
                    break;
                }
                case 5: {
                    typeStr = "contraction";
                    break;
                }
                case 6: {
                    typeStr = "symbol     ";
                    break;
                }
                default: {
                    typeStr = "unknown    ";
                }
            }
            String capsStr = "";
            switch (t.getCaps()) {
                case 4: {
                    capsStr = "A";
                    break;
                }
                case 1: {
                    capsStr = "N";
                    break;
                }
                case 2: {
                    capsStr = "M";
                    break;
                }
                case 3: {
                    capsStr = "F";
                    break;
                }
                default: {
                    capsStr = "?";
                }
            }
            String numPosStr = "";
            switch (t.getNumPosition()) {
                case 1: {
                    numPosStr = "F";
                    break;
                }
                case 2: {
                    numPosStr = "M";
                    break;
                }
                case 3: {
                    numPosStr = "L";
                    break;
                }
                case 0: {
                    numPosStr = "N";
                    break;
                }
                default: {
                    numPosStr = "?";
                }
            }
            String intStr = "";
            intStr = t.isInteger() ? "Y" : "N";
            System.out.println("Token: type=[" + typeStr + "] caps=[" + capsStr + "] npos=[" + numPosStr + "] int=[" + intStr + "] offsets=[" + t.getStartOffset() + "," + t.getEndOffset() + "]\t\ttext=[" + text.substring(t.getStartOffset(), t.getEndOffset()) + "]");
        }
    }

    public static String getUsage() {
        return "java LucenePopulateDriver <dir-containing-textfile(s)> [hyphenfile] [freqcutoff]";
    }

    protected void writeToFormatLucene(String cuePhrase, String cuePhraseCategory, String cuePhraseFamily) {
        Document doc = new Document();
        try {
            ++this.idCount;
            System.out.println(" " + this.idCount + " processed so far out of total");
            doc.add((IndexableField)new TextField(CUE_PHRASE_FIELD_NAME, cuePhrase, Field.Store.YES));
            doc.add((IndexableField)new StringField(CUE_PHRASE_CATEGORY_FIELD_NAME, cuePhraseCategory, Field.Store.YES));
            doc.add((IndexableField)new StringField(CUE_PHRASE_FAMILY_FIELD_NAME, cuePhraseFamily, Field.Store.YES));
            List list = tokenizer.tokenize(cuePhrase);
            Collections.sort(list, new OffsetComparator());
            Iterator tokenItr = list.iterator();
            int tCount = 0;
            String firstTokenText = "";
            String tokenizedCuePhrase = "";
            while (tokenItr.hasNext()) {
                Token t = (Token)tokenItr.next();
                if (++tCount == 1) {
                    firstTokenText = t.getText();
                    tokenizedCuePhrase = tokenizedCuePhrase + t.getText();
                    continue;
                }
                tokenizedCuePhrase = tokenizedCuePhrase + " " + t.getText();
            }
            doc.add((IndexableField)new StringField(CUE_PHRASE_FIRST_WORD_FIELD_NAME, firstTokenText, Field.Store.YES));
            this.iwriter.addDocument((Iterable)doc);
            String data = cuePhrase + "|" + cuePhraseCategory + "|" + tokenizedCuePhrase + '\n';
            this.writeToFile(data);
        }
        catch (IOException io) {
            System.out.println("IOException in document : io " + io.getLocalizedMessage());
        }
        catch (Exception exc) {
            System.out.println("Exception in document : exc " + exc.getLocalizedMessage());
        }
    }

    public void writeToFile(String str) {
        try {
            FileWriter fstream = new FileWriter("sample.txt", true);
            BufferedWriter out = new BufferedWriter(fstream);
            out.write(str);
            out.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

