/*
 * Decompiled with CFR 0.152.
 */
package com.googlecode.clearnlp.run;

import com.carrotsearch.hppc.IntContainer;
import com.carrotsearch.hppc.IntLookupContainer;
import com.carrotsearch.hppc.IntOpenHashSet;
import com.googlecode.clearnlp.constituent.CTLibEn;
import com.googlecode.clearnlp.constituent.CTNode;
import com.googlecode.clearnlp.constituent.CTTree;
import com.googlecode.clearnlp.propbank.PBArg;
import com.googlecode.clearnlp.propbank.PBInstance;
import com.googlecode.clearnlp.propbank.PBLib;
import com.googlecode.clearnlp.propbank.PBLoc;
import com.googlecode.clearnlp.run.AbstractRun;
import com.googlecode.clearnlp.util.UTHppc;
import com.googlecode.clearnlp.util.UTOutput;
import com.googlecode.clearnlp.util.list.SortedIntArrayList;
import com.googlecode.clearnlp.util.pair.Pair;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.kohsuke.args4j.Option;

public class PBPostProcess
extends AbstractRun {
    public static final String ERR_ALIGN = "A";
    public static final String ERR_CYCLIC = "C";
    public static final String ERR_OVERLAP = "O";
    public static final String ERR_LV = "L";
    @Option(name="-i", usage="the PropBank file to be post-processed (required)", required=true, metaVar="<filename>")
    private String s_propFile;
    @Option(name="-o", usage="the post-processed PropBank file (default: null)", required=false, metaVar="<filename>")
    private String s_postFile = null;
    @Option(name="-t", usage="the directory path to Treebank files (required)", required=true, metaVar="<dirpath>")
    private String s_treeDir;
    @Option(name="-n", usage="if set, normalize co-indices of constituent trees", required=false)
    private boolean b_norm;
    @Option(name="-l", usage="language (default: en)", required=false, metaVar="<language>")
    private String s_language = "en";

    public PBPostProcess(String[] args) {
        this.initArgs(args);
        this.postProcess(this.s_propFile, this.s_postFile, this.s_treeDir, this.b_norm, this.s_language);
    }

    public void postProcess(String propFile, String postFile, String treeDir, boolean norm, String language) {
        List<PBInstance> instances = PBLib.getPBInstanceList(propFile, treeDir, norm);
        ArrayList<PBInstance> remove = new ArrayList<PBInstance>();
        this.mergeLightVerbs(instances);
        for (PBInstance instance : instances) {
            System.out.println(instance.getKey());
            CTTree tree = instance.getTree();
            if (language.equals("en")) {
                CTLibEn.preprocessTree(tree);
            } else if (language.equals("ar")) {
                // empty if block
            }
            if (this.isSkip(instance, tree)) {
                remove.add(instance);
                continue;
            }
            instance.sortArgs();
            this.joinConcatenations(instance);
            this.fixCyclicLocs(instance);
            this.removeRedundantLocs(instance);
            if (instance.isVerbPredicate()) {
                this.fixIllegalPROs(instance);
            }
            PBArg aDSP = this.getArgDSP(instance);
            this.getLinks(instance);
            this.normalizeLinks(instance);
            instance.sortArgs();
            this.removeRedundantLocs(instance);
            this.findOverlappingArguments(instance);
            this.addLinks(instance);
            this.raiseEmptyArguments(instance);
            if (aDSP == null) continue;
            instance.addArg(aDSP);
        }
        instances.removeAll(remove);
        if (postFile == null) {
            this.printInstances(instances, treeDir);
        } else {
            PBLib.printPBInstances(instances, postFile);
        }
    }

    private boolean isSkip(PBInstance instance, CTTree tree) {
        if (PBLib.ILLEGAL_ROLESET.matcher(instance.roleset).find()) {
            return true;
        }
        if (this.findMisalignedArgs(instance)) {
            return true;
        }
        return instance.isVerbPredicate() && tree.getTerminal(instance.predId).getParent().isPTag("PP");
    }

    private void mergeLightVerbs(List<PBInstance> instances) {
        HashMap<String, PBInstance> mNouns = new HashMap<String, PBInstance>();
        ArrayList<PBInstance> lVerbs = new ArrayList<PBInstance>();
        ArrayList<PBInstance> remove = new ArrayList<PBInstance>();
        for (PBInstance instance : instances) {
            if (instance.isVerbPredicate()) {
                if (!instance.roleset.endsWith("LV")) continue;
                lVerbs.add(instance);
                continue;
            }
            mNouns.put(instance.getKey(), instance);
        }
        for (PBInstance instance : lVerbs) {
            PBInstance nInst = null;
            ArrayList<PBArg> args = new ArrayList<PBArg>();
            for (PBArg arg : instance.getArgs()) {
                if (arg.label.endsWith("PRR")) {
                    nInst = (PBInstance)mNouns.get(instance.getKey(arg.getLoc((int)0).terminalId));
                    continue;
                }
                if (!arg.label.startsWith("LINK") && !arg.isLabel("ARG0")) continue;
                args.add(arg);
            }
            if (nInst == null) {
                StringBuilder build = new StringBuilder();
                build.append(ERR_LV);
                build.append(":");
                build.append(" ");
                build.append(instance.toString());
                System.err.println(build.toString());
                remove.add(instance);
                continue;
            }
            nInst.addArgs(args);
            PBArg rel = nInst.getFirstArg("rel");
            rel.addLoc(new PBLoc(instance.predId, 0, ","));
            args.clear();
            for (PBArg arg : instance.getArgs()) {
                if (arg.isLabel("rel") || arg.label.endsWith("PRR")) continue;
                args.add(arg);
            }
            instance.removeArgs(args);
        }
        instances.removeAll(remove);
    }

    private boolean findMisalignedArgs(PBInstance instance) {
        CTTree tree = instance.getTree();
        String label = null;
        if (!tree.isRange(instance.predId, 0) || instance.isVerbPredicate() && !tree.getTerminal((int)instance.predId).pTag.startsWith("VB") || instance.isNounPredicate() && !tree.getTerminal((int)instance.predId).pTag.startsWith("NN")) {
            label = "rel";
        } else {
            block0: for (PBArg arg : instance.getArgs()) {
                for (PBLoc loc : arg.getLocs()) {
                    if (!tree.isRange(loc)) {
                        label = arg.label;
                        break block0;
                    }
                    if (!loc.isType("&")) continue;
                    loc.type = "*";
                }
            }
        }
        if (label != null) {
            StringBuilder build = new StringBuilder();
            build.append(ERR_ALIGN);
            build.append(":");
            build.append(label);
            build.append(" ");
            build.append(instance.toString());
            System.err.println(build.toString());
            return true;
        }
        return false;
    }

    private void joinConcatenations(PBInstance instance) {
        SortedIntArrayList ids = new SortedIntArrayList();
        CTTree tree = instance.getTree();
        for (PBArg arg : instance.getArgs()) {
            if (arg.isLabel("rel")) continue;
            ids.clear();
            for (PBLoc loc : arg.getLocs()) {
                if (!loc.isType("") && !loc.isType(",")) {
                    return;
                }
                if (loc.height > 0) {
                    return;
                }
                ids.add(loc.terminalId);
            }
            ArrayList<PBLoc> lNew = new ArrayList<PBLoc>();
            while (!ids.isEmpty()) {
                CTNode parent;
                int terminalId = ids.get(0);
                int height = 0;
                CTNode node = tree.getNode(terminalId, height);
                while ((parent = node.getParent()) != null && !parent.isPTag("TOP") && UTHppc.isSubset((IntContainer)ids, (IntContainer)parent.getSubTerminalIdSet())) {
                    node = parent;
                    ++height;
                }
                lNew.add(new PBLoc(terminalId, height, ","));
                ids.removeAll((IntLookupContainer)node.getSubTerminalIdSet());
            }
            if (lNew.size() >= arg.getLocSize()) continue;
            ((PBLoc)lNew.get((int)0)).type = "";
            arg.replaceLocs(lNew);
        }
    }

    private void fixCyclicLocs(PBInstance instance) {
        CTTree tree = instance.getTree();
        int predId = instance.predId;
        boolean isCyc = false;
        StringBuilder build = new StringBuilder();
        build.append(ERR_CYCLIC);
        block0: for (PBArg arg : instance.getArgs()) {
            if (arg.isLabel("rel")) continue;
            for (PBLoc loc : arg.getLocs()) {
                CTNode tmp;
                CTNode node = tree.getNode(loc);
                if (!node.getSubTerminalIdSet().contains(predId)) continue;
                if (arg.isLabel("ARGM-MOD")) {
                    loc.height = 0;
                    continue;
                }
                if (arg.isLabel("LINK-SLC") && node.isPTag("SBAR") && (tmp = node.getFirstChild("+WH.*")) != null) {
                    loc.set(tmp.getPBLoc(), loc.type);
                    continue;
                }
                if (node.isPTag("NP") && (tmp = node.getChild(0)).isPTag("NP") && !tmp.getSubTerminalIdSet().contains(predId)) {
                    --loc.height;
                    continue;
                }
                build.append(":");
                build.append(arg.label);
                isCyc = true;
                continue block0;
            }
        }
        if (isCyc) {
            build.append(" ");
            build.append(instance.toString());
            System.err.println(build.toString());
        }
    }

    private void removeRedundantLocs(PBInstance instance) {
        ArrayList<PBLoc> lDel = new ArrayList<PBLoc>();
        for (PBArg arg : instance.getArgs()) {
            int size = arg.getLocSize() - 1;
            lDel.clear();
            for (int i = 0; i < size; ++i) {
                PBLoc curr = arg.getLoc(i);
                PBLoc next = arg.getLoc(i + 1);
                if (curr.terminalId != next.terminalId) continue;
                lDel.add(curr);
            }
            if (lDel.isEmpty()) continue;
            arg.removeLocs(lDel);
        }
    }

    private void fixIllegalPROs(PBInstance instance) {
        CTTree tree = instance.getTree();
        for (PBArg arg : instance.getArgs()) {
            if (arg.isLabel("rel")) continue;
            for (PBLoc loc : arg.getLocs()) {
                CTNode node;
                if (loc.terminalId <= instance.predId || !(node = tree.getNode(loc)).isEmptyCategoryRec() || !node.hasFTag("SBJ") || !node.getParent().isPTag("S")) continue;
                ++loc.height;
            }
        }
    }

    private void getLinks(PBInstance instance) {
        CTTree tree = instance.getTree();
        ArrayList<PBArg> lLinks = new ArrayList<PBArg>();
        block0: for (PBArg arg : instance.getArgs()) {
            if (arg.label.startsWith("LINK")) {
                lLinks.add(arg);
            }
            for (int i = arg.getLocSize() - 1; i > 0; --i) {
                CTNode link;
                PBLoc loc = arg.getLoc(i);
                CTNode node = tree.getNode(loc);
                if (node.pTag.startsWith("WH")) {
                    link = CTLibEn.getComplementizer(node);
                    if (link.getAntecedent() != null) continue;
                    link.setAntecedent(tree.getNode(arg.getLoc(i - 1)));
                    continue block0;
                }
                if (CTLibEn.isComplementizer(node)) {
                    if (node.getAntecedent() != null) continue;
                    node.setAntecedent(tree.getNode(arg.getLoc(i - 1)));
                    continue block0;
                }
                if (!node.isEmptyCategoryRec() || !loc.isType("*") || (link = node.getFirstTerminal()).getAntecedent() != null) continue;
                link.setAntecedent(tree.getNode(arg.getLoc(i - 1)));
            }
        }
        if (!lLinks.isEmpty()) {
            instance.removeArgs(lLinks);
        }
    }

    private void normalizeLinks(PBInstance instance) {
        ArrayList<PBLoc> lDel = new ArrayList<PBLoc>();
        CTTree tree = instance.getTree();
        CTNode pred = tree.getTerminal(instance.predId);
        for (PBArg arg : instance.getArgs()) {
            if (arg.isLabel("rel")) continue;
            lDel.clear();
            for (int i = 0; i < arg.getLocSize(); ++i) {
                CTNode node;
                CTNode ante;
                PBLoc cLoc = arg.getLoc(i);
                CTNode curr = tree.getNode(cLoc);
                if (CTLibEn.isComplementizer(curr)) {
                    ante = curr.getAntecedent();
                    if (ante != null) {
                        arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
                    }
                    if ((node = this.getCoIndexedWHNode(curr)) == null) continue;
                    cLoc.set(node.getPBLoc(), "*");
                    continue;
                }
                if (curr.pTag.startsWith("WH")) {
                    node = CTLibEn.getComplementizer(curr);
                    if (node == null || (ante = node.getAntecedent()) == null) continue;
                    arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
                    continue;
                }
                if (curr.isEmptyCategoryRec()) {
                    cLoc.height = 0;
                    node = tree.getTerminal(cLoc.terminalId);
                    ante = node.getAntecedent();
                    if (ante == null) continue;
                    arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
                    continue;
                }
                List<CTNode> list = curr.getIncludedEmptyCategory("\\*(ICH|RNR)\\*.*");
                if (!list.isEmpty()) {
                    for (CTNode ec : list) {
                        lDel.add(new PBLoc(ec.getPBLoc(), ""));
                        ante = ec.getAntecedent();
                        if (ante == null) continue;
                        if (ante.isDescendantOf(curr) || pred.isDescendantOf(ante)) {
                            lDel.add(new PBLoc(ante.getPBLoc(), ""));
                            continue;
                        }
                        arg.addLoc(new PBLoc(ante.getPBLoc(), ";"));
                    }
                    continue;
                }
                if (!curr.isPTag("S") || (node = curr.getFirstChild("-SBJ")) == null || !node.isEmptyCategoryRec() || !curr.containsTags("VP")) continue;
                node = node.getFirstTerminal();
                if (!CTLibEn.RE_NULL.matcher(node.form).find() || (ante = node.getAntecedent()) == null || !ante.hasFTag("SBJ") || ante.isEmptyCategoryRec() || this.existsLoc(instance, ante.getPBLoc())) continue;
                arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
            }
            for (PBLoc rLoc : lDel) {
                arg.removeLoc(rLoc.terminalId, rLoc.height);
            }
        }
    }

    private CTNode getCoIndexedWHNode(CTNode node) {
        for (CTNode parent = node.getParent(); parent != null && parent.pTag.startsWith("WH"); parent = parent.getParent()) {
            if (parent.coIndex == -1) continue;
            return parent;
        }
        return null;
    }

    private boolean existsLoc(PBInstance instance, PBLoc loc) {
        for (PBArg arg : instance.getArgs()) {
            for (PBLoc l : arg.getLocs()) {
                if (!l.equals(loc.terminalId, loc.height)) continue;
                return true;
            }
        }
        return false;
    }

    private boolean findOverlappingArguments(PBInstance instance) {
        CTTree tree = instance.getTree();
        int size = instance.getArgSize();
        ArrayList<PBArg> lDel = new ArrayList<PBArg>();
        for (int i = 0; i < size; ++i) {
            PBArg ai = instance.getArg(i);
            IntOpenHashSet si = this.getTerminalIdSet(ai, tree);
            int ni = si.size();
            for (int j = i + 1; j < size; ++j) {
                PBArg aj = instance.getArg(j);
                IntOpenHashSet sj = this.getTerminalIdSet(aj, tree);
                int nj = sj.size();
                if (UTHppc.isSubset((IntContainer)si, (IntContainer)sj) && ni != nj) {
                    lDel.add(aj);
                    continue;
                }
                if (UTHppc.isSubset((IntContainer)sj, (IntContainer)si) && ni != nj) {
                    lDel.add(ai);
                    continue;
                }
                if (UTHppc.intersection((IntContainer)si, (IntContainer)sj).isEmpty()) continue;
                StringBuilder build = new StringBuilder();
                build.append(ERR_OVERLAP);
                build.append(":");
                build.append(ai.label);
                build.append(":");
                build.append(aj.label);
                build.append(" ");
                build.append(instance.toString());
                System.err.println(build.toString());
                return true;
            }
        }
        instance.removeArgs(lDel);
        return false;
    }

    private IntOpenHashSet getTerminalIdSet(PBArg arg, CTTree tree) {
        IntOpenHashSet set = new IntOpenHashSet();
        for (PBLoc loc : arg.getLocs()) {
            if (loc.isType(";")) continue;
            set.addAll((IntContainer)tree.getNode(loc).getSubTerminalIdSet());
        }
        return set;
    }

    private void addLinks(PBInstance instance) {
        CTTree tree = instance.getTree();
        CTNode ante = null;
        ArrayList<PBArg> lAdd = new ArrayList<PBArg>();
        for (PBArg arg : instance.getArgs()) {
            for (PBLoc loc : arg.getLocs()) {
                CTNode node = tree.getNode(loc);
                String label = null;
                if (node.pTag.startsWith("WH")) {
                    CTNode comp = CTLibEn.getComplementizer(node);
                    if (comp != null && (ante = comp.getAntecedent()) != null) {
                        label = "LINK-SLC";
                    }
                } else if (node.isEmptyCategory() && (ante = node.getAntecedent()) != null) {
                    if (node.form.equals("*")) {
                        label = "LINK-PSV";
                    } else if (node.form.equals("*PRO*")) {
                        label = "LINK-PRO";
                    }
                }
                if (label == null) continue;
                PBArg nArg = new PBArg();
                nArg.label = label;
                nArg.addLoc(new PBLoc(ante.getPBLoc(), ""));
                nArg.addLoc(new PBLoc(node.getPBLoc(), "*"));
                lAdd.add(nArg);
            }
        }
        instance.addArgs(lAdd);
    }

    private void raiseEmptyArguments(PBInstance instance) {
        CTTree tree = instance.getTree();
        for (PBArg arg : instance.getArgs()) {
            if (arg.isLabel("rel")) continue;
            int size = arg.getLocSize();
            for (int i = 0; i < size; ++i) {
                PBLoc loc = arg.getLoc(i);
                CTNode node = tree.getNode(loc);
                CTNode parent = node.getParent();
                if (parent != null && !parent.isPTag("TOP") && parent.getChildrenSize() == 1) {
                    node = parent;
                }
                loc.set(node.getPBLoc(), loc.type);
            }
        }
    }

    private void printInstances(List<PBInstance> instances, String treeDir) {
        String treePath = "";
        PrintStream fout = null;
        for (PBInstance instance : instances) {
            if (!treePath.equals(instance.treePath)) {
                String propPath;
                if (fout != null) {
                    fout.close();
                }
                if (new File(propPath = (treePath = instance.treePath).substring(0, treePath.lastIndexOf(".")) + ".prop").exists()) {
                    System.err.println("Warning: '" + propPath + "' already exists");
                }
                fout = UTOutput.createPrintBufferedFileStream(treeDir + File.separator + propPath);
            }
            fout.println(instance.toString());
        }
        if (fout != null) {
            fout.close();
        }
    }

    private PBArg getArgDSP(PBInstance instance) {
        CTTree tree = instance.getTree();
        CTNode pred = tree.getTerminal(instance.predId);
        Pair<CTNode, CTNode> pair = this.getESMPair(pred);
        if (pair == null) {
            return null;
        }
        Pair<Object, IntOpenHashSet> max = new Pair<Object, IntOpenHashSet>(null, new IntOpenHashSet());
        CTNode prn = (CTNode)pair.o1;
        CTNode esm = (CTNode)pair.o2;
        for (PBArg arg : instance.getArgs()) {
            if (!PBLib.isNumberedArgument(arg) || arg.isLabel("ARG0")) continue;
            IntOpenHashSet set = arg.getTerminalIdSet(tree);
            if (set.contains(esm.getTerminalId())) {
                max.set(arg, set);
                break;
            }
            if (!arg.hasType(",") || ((IntOpenHashSet)max.o2).size() >= set.size()) continue;
            max.set(arg, set);
        }
        if (max.o1 == null) {
            return null;
        }
        CTNode dsp = esm.getAntecedent();
        if (dsp == null) {
            dsp = prn.getNearestAncestor("+S.*");
        }
        if (dsp != null) {
            PBArg arg;
            arg = new PBArg();
            arg.addLoc(dsp.getPBLoc());
            arg.label = ((PBArg)max.o1).label + "-" + "DSP";
            instance.removeArgs(((PBArg)max.o1).label);
            return arg;
        }
        return null;
    }

    private Pair<CTNode, CTNode> getESMPair(CTNode pred) {
        CTNode ec;
        CTNode next;
        CTNode s = pred.getNearestAncestor("+S.*");
        if (s != null && s.getParent().isPTag("PRN") && (next = pred.getNextSibling("+S|SBAR")) != null && (ec = this.getESM(next)) != null) {
            return new Pair<CTNode, CTNode>(s.getParent(), ec);
        }
        return null;
    }

    private CTNode getESM(CTNode node) {
        if (node.isPTag("S")) {
            return this.getESMAux(node);
        }
        if (node.isPTag("SBAR") && node.getChildrenSize() == 2) {
            CTNode fst = node.getChild(0);
            CTNode snd = node.getChild(1);
            if (fst.isEmptyCategory() && fst.form.equals("0")) {
                return this.getESMAux(snd);
            }
        }
        return null;
    }

    private CTNode getESMAux(CTNode node) {
        CTNode ec;
        if (node.isEmptyCategoryRec() && (ec = node.getFirstTerminal()) != null && (ec.form.startsWith("*T*") || ec.form.startsWith("*?*"))) {
            return ec;
        }
        return null;
    }

    public static void main(String[] args) {
        new PBPostProcess(args);
    }
}

