/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.ruta.textruler.learner.whisk.token;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
import org.apache.uima.ruta.textruler.core.TextRulerExample;
import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
import org.apache.uima.ruta.textruler.core.TextRulerRule;
import org.apache.uima.ruta.textruler.core.TextRulerRuleList;
import org.apache.uima.ruta.textruler.core.TextRulerSlotPattern;
import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector;
import org.apache.uima.ruta.textruler.core.TextRulerTarget;
import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
import org.apache.uima.ruta.textruler.extension.TextRulerLearner;
import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
import org.apache.uima.ruta.textruler.learner.whisk.token.WhiskRule;
import org.apache.uima.ruta.textruler.learner.whisk.token.WhiskRuleItem;

public class Whisk
extends TextRulerBasicLearner {
    public static final String WINDOSIZE_KEY = "windowSize";
    public static final String ERROR_THRESHOLD_KEY = "errorThreshold";
    public static final String POSTAG_ROOTTYPE_KEY = "posTagRootType";
    public static final int STANDARD_WINDOWSIZE = 5;
    public static final float STANDARD_ERROR_THRESHOLD = 0.1f;
    public static final String STANDARD_POSTAG_ROOTTYPE = "org.apache.uima.ml.ML.postag";
    TextRulerRuleList ruleList;
    protected Set<TextRulerExample> coveredExamples;
    protected int windowSize = 5;
    protected double errorThreshold = 0.1f;
    protected String posTagRootTypeName = "org.apache.uima.ml.ML.postag";
    int roundNumber = 0;
    int allExamplesCount = 0;
    private Map<String, TextRulerStatisticsCollector> cachedTestedRuleStatistics = new HashMap<String, TextRulerStatisticsCollector>();

    public Whisk(String inputDir, String prePropTmFile, String tmpDir, String[] slotNames, Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
        super(inputDir, prePropTmFile, tmpDir, slotNames, filterSet, skip, delegate);
    }

    @Override
    public boolean collectNegativeCoveredInstancesWhenTesting() {
        return false;
    }

    @Override
    protected void doRun() {
        this.cachedTestedRuleStatistics.clear();
        this.ruleList = new TextRulerRuleList();
        this.coveredExamples = new HashSet<TextRulerExample>();
        this.sendStatusUpdateToDelegate("Creating examples...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        TextRulerTarget target = new TextRulerTarget(this.slotNames[0], (TextRulerBasicLearner)this);
        this.exampleDocuments.createExamplesForTarget(target);
        TextRulerExampleDocument[] docs = this.exampleDocuments.getSortedDocumentsInCacheOptimizedOrder();
        this.allExamplesCount = this.exampleDocuments.getAllPositiveExamples().size();
        for (TextRulerExampleDocument inst : docs) {
            List<TextRulerExample> tags = inst.getPositiveExamples();
            for (TextRulerExample tag : tags) {
                if (this.coveredExamples.contains(tag)) continue;
                ++this.roundNumber;
                WhiskRule newRule = this.growRule(inst, tag);
                if (this.shouldAbort()) break;
                if (newRule == null || newRule.getCoveringStatistics().getCoveredNegativesCount() != 0 && !(newRule.getLaplacian() <= this.errorThreshold)) continue;
                this.ruleList.addRule(newRule);
                this.coveredExamples.addAll(newRule.getCoveringStatistics().getCoveredPositiveExamples());
                this.sendStatusUpdateToDelegate("New Rule added...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, true);
            }
            if (!this.shouldAbort()) continue;
            return;
        }
        this.sendStatusUpdateToDelegate("Done", TextRulerLearner.TextRulerLearnerState.ML_DONE, true);
        this.cachedTestedRuleStatistics.clear();
    }

    protected WhiskRule growRule(TextRulerExampleDocument doc, TextRulerExample example) {
        this.sendStatusUpdateToDelegate("Creating new rule from seed...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        WhiskRule theRule = new WhiskRule(this, example.getTarget(), example);
        int numberOfSlotsInTag = example.getAnnotations().length;
        for (int i = 0; i < numberOfSlotsInTag; ++i) {
            theRule.getPatterns().add(new TextRulerSlotPattern());
        }
        List<WhiskRuleItem> allTerms = this.getAllTermsOfExample(example);
        this.sendStatusUpdateToDelegate("Creating new rule: anchoring...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        for (int i = 0; i < numberOfSlotsInTag; ++i) {
            theRule = this.anchor(theRule, doc, example, allTerms, i);
            if (!this.shouldAbort()) continue;
            return null;
        }
        this.sendStatusUpdateToDelegate("Creating new rule: extending...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        if (theRule != null) {
            WhiskRule extendedRule;
            double oldLaplacian = theRule.getLaplacian();
            int subRoundNumber = 0;
            while (theRule.getCoveringStatistics().getCoveredNegativesCount() > 0 && (extendedRule = this.extendRule(theRule, doc, example, allTerms, subRoundNumber)) != null) {
                theRule = extendedRule;
                TextRulerToolkit.log("----------------------------");
                TextRulerToolkit.log("BEST EXTENSION IS: " + theRule.getRuleString());
                TextRulerToolkit.log("Laplacian: " + theRule.getLaplacian() + "    ; " + theRule.getCoveringStatistics());
                ++subRoundNumber;
                double newLaplacian = theRule.getLaplacian();
                if (newLaplacian >= oldLaplacian) break;
                oldLaplacian = newLaplacian;
            }
            TextRulerToolkit.log("----------------------------");
            TextRulerToolkit.log("FINAL RULE IS : " + theRule.getRuleString());
        }
        return theRule;
    }

    protected WhiskRule extendRule(WhiskRule rule, TextRulerExampleDocument doc, TextRulerExample example, List<WhiskRuleItem> allTerms, int subRoundNumber) {
        WhiskRule bestRule = null;
        double bestL = 1.0;
        int bestRuleConstraintPoints = -1;
        if (rule.getLaplacian() <= this.errorThreshold) {
            bestRule = rule;
            bestL = rule.getLaplacian();
        }
        List<WhiskRuleItem> slotTerms = this.getTermsWithinBounds(allTerms, example.getAnnotations()[0].getBegin(), example.getAnnotations()[0].getEnd());
        WhiskRuleItem firstSlotTerm = slotTerms.get(0);
        WhiskRuleItem lastSlotTerm = slotTerms.get(slotTerms.size() - 1);
        ArrayList<TextRulerRule> rulesToTest = new ArrayList<TextRulerRule>();
        for (WhiskRuleItem term : allTerms) {
            AnnotationFS posTag;
            List<AnnotationFS> posTagAnnotations;
            if (rule.containsTerm(term)) continue;
            boolean rejectTerm = false;
            if (term.getTermNumberInExample() < firstSlotTerm.getTermNumberInExample()) {
                rejectTerm = firstSlotTerm.getTermNumberInExample() - term.getTermNumberInExample() > this.windowSize;
            } else if (term.getTermNumberInExample() > lastSlotTerm.getTermNumberInExample()) {
                boolean bl = rejectTerm = term.getTermNumberInExample() - firstSlotTerm.getTermNumberInExample() > this.windowSize;
            }
            if (rejectTerm) continue;
            WhiskRule proposedRule = this.createNewRuleByAddingTerm(rule, term);
            WhiskRuleItem t = proposedRule.searchItemWithTermNumber(term.getTermNumberInExample());
            if (!rulesToTest.contains(proposedRule)) {
                rulesToTest.add(proposedRule);
            }
            WhiskRule proposedRule2 = null;
            WhiskRuleItem t2 = null;
            if (t.getWordConstraint().isRegExpConstraint()) {
                proposedRule2 = proposedRule.copy();
                t2 = proposedRule2.searchItemWithTermNumber(term.getTermNumberInExample());
                t2.setHideRegExp(true);
                proposedRule2.setNeedsCompile(true);
                if (!rulesToTest.contains(proposedRule2)) {
                    rulesToTest.add(proposedRule2);
                }
            }
            if (this.posTagRootTypeName == null || this.posTagRootTypeName.length() <= 0) continue;
            TextRulerAnnotation tokenAnnotation = term.getWordConstraint().getTokenAnnotation();
            CAS cas = example.getDocumentCAS();
            TypeSystem ts = cas.getTypeSystem();
            Type posTagsRootType = ts.getType(this.posTagRootTypeName);
            if (ts == null || (posTagAnnotations = TextRulerToolkit.getAnnotationsWithinBounds(cas, tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null, posTagsRootType)).size() <= 0 || (posTag = posTagAnnotations.get(0)).getBegin() != tokenAnnotation.getBegin() || posTag.getEnd() != tokenAnnotation.getEnd()) continue;
            TextRulerAnnotation posTagAnnotation = new TextRulerAnnotation(posTag, doc);
            WhiskRule proposedRule3 = proposedRule.copy();
            WhiskRuleItem t3 = proposedRule3.searchItemWithTermNumber(term.getTermNumberInExample());
            t3.addOtherConstraint(new WhiskRuleItem.MLWhiskOtherConstraint(tokenAnnotation, posTagAnnotation));
            proposedRule3.setNeedsCompile(true);
            if (!rulesToTest.contains(proposedRule3)) {
                rulesToTest.add(proposedRule3);
            }
            if (proposedRule2 != null) {
                WhiskRule proposedRule4 = proposedRule2.copy();
                WhiskRuleItem t4 = proposedRule4.searchItemWithTermNumber(term.getTermNumberInExample());
                t4.addOtherConstraint(new WhiskRuleItem.MLWhiskOtherConstraint(tokenAnnotation, posTagAnnotation));
                proposedRule4.setNeedsCompile(true);
                if (!rulesToTest.contains(proposedRule4)) {
                    rulesToTest.add(proposedRule4);
                }
            }
            WhiskRule proposedRule5 = proposedRule.copy();
            WhiskRuleItem t5 = proposedRule5.searchItemWithTermNumber(term.getTermNumberInExample());
            t5.addOtherConstraint(new WhiskRuleItem.MLWhiskOtherConstraint(tokenAnnotation, posTagAnnotation));
            t5.setWordConstraint(null);
            proposedRule5.setNeedsCompile(true);
            if (rulesToTest.contains(proposedRule5)) continue;
            rulesToTest.add(proposedRule5);
        }
        if (rulesToTest.size() == 0) {
            return bestRule;
        }
        this.sendStatusUpdateToDelegate("Round " + this.roundNumber + "." + subRoundNumber + " - Testing " + rulesToTest.size() + " rules...  - uncovered examples: " + (this.allExamplesCount - this.coveredExamples.size()) + " / " + this.allExamplesCount + " ; cs=" + this.cachedTestedRuleStatistics.size(), TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        TextRulerToolkit.log("Testing " + rulesToTest.size() + " rules on training set...");
        for (TextRulerRule r : rulesToTest) {
            TextRulerToolkit.log(r.getRuleString());
        }
        this.testRulesIfNotCached(rulesToTest);
        if (this.shouldAbort()) {
            return null;
        }
        for (TextRulerRule r : rulesToTest) {
            WhiskRule wr = (WhiskRule)r;
            if (wr.getLaplacian() < bestL) {
                bestL = wr.getLaplacian();
                bestRule = wr;
                bestRuleConstraintPoints = bestRule.totalConstraintPoints();
                continue;
            }
            if (wr.getLaplacian() != bestL || bestRuleConstraintPoints < 0) continue;
            TextRulerToolkit.log("Same Laplacian! So prefer more general rule!");
            if (wr.totalConstraintPoints() >= bestRuleConstraintPoints) continue;
            TextRulerToolkit.log("\tYes, prefered!");
            bestL = wr.getLaplacian();
            bestRule = wr;
            bestRuleConstraintPoints = bestRule.totalConstraintPoints();
        }
        return bestRule;
    }

    protected WhiskRule createNewRuleByAddingTerm(WhiskRule baseRule, WhiskRuleItem term) {
        WhiskRule newRule = baseRule.copy();
        int foundSlotNumber = -1;
        String foundSlotPattern = "";
        int termNumber = term.getTermNumberInExample();
        ArrayList targetPattern = null;
        ArrayList previousSlotPostFillerPattern = null;
        for (int i = 0; i < newRule.getPatterns().size(); ++i) {
            TextRulerSlotPattern slotPattern = newRule.getPatterns().get(i);
            WhiskRuleItem it = (WhiskRuleItem)slotPattern.preFillerPattern.lastItem();
            if (it != null && termNumber <= it.getTermNumberInExample()) {
                targetPattern = slotPattern.preFillerPattern;
            }
            if (targetPattern == null && slotPattern.fillerPattern.size() > 0) {
                it = (WhiskRuleItem)slotPattern.fillerPattern.firstItem();
                if (termNumber < it.getTermNumberInExample()) {
                    targetPattern = slotPattern.preFillerPattern;
                } else {
                    it = (WhiskRuleItem)slotPattern.fillerPattern.lastItem();
                    if (termNumber <= it.getTermNumberInExample()) {
                        targetPattern = slotPattern.fillerPattern;
                    }
                }
            }
            if (targetPattern == null && slotPattern.postFillerPattern.size() > 0) {
                it = (WhiskRuleItem)slotPattern.postFillerPattern.firstItem();
                if (termNumber < it.getTermNumberInExample()) {
                    targetPattern = slotPattern.fillerPattern;
                } else {
                    it = (WhiskRuleItem)slotPattern.postFillerPattern.lastItem();
                    if (termNumber <= it.getTermNumberInExample()) {
                        targetPattern = slotPattern.postFillerPattern;
                    }
                }
            }
            if (targetPattern == null) {
                targetPattern = previousSlotPostFillerPattern;
                if (i > 0) {
                    TextRulerSlotPattern prevSlotPattern = newRule.getPatterns().get(i - 1);
                    foundSlotPattern = targetPattern == prevSlotPattern.preFillerPattern ? "PRE FILLER" : (targetPattern == prevSlotPattern.fillerPattern ? "FILLER" : "POST FILLER");
                    foundSlotNumber = i - 1;
                }
            } else {
                foundSlotPattern = targetPattern == slotPattern.preFillerPattern ? "PRE FILLER" : (targetPattern == slotPattern.fillerPattern ? "FILLER" : "POST FILLER");
                foundSlotNumber = i;
            }
            previousSlotPostFillerPattern = slotPattern.postFillerPattern;
        }
        if (targetPattern == null) {
            targetPattern = previousSlotPostFillerPattern;
            foundSlotNumber = newRule.getPatterns().size() - 1;
            foundSlotPattern = "POST FILLER";
        }
        if (targetPattern == null) {
            TextRulerToolkit.log("ERROR, NO TARGET PATTERN FOR NEW RULE TERM FOUND !");
        } else {
            WhiskRuleItem right;
            int indexInPattern = -1;
            if (targetPattern.size() == 0) {
                targetPattern.add(term.copy());
                indexInPattern = 0;
            } else {
                WhiskRuleItem wildCard = newRule.searchItemWithTermNumber(termNumber);
                if (wildCard != null) {
                    if (!wildCard.isStarWildCard()) {
                        TextRulerToolkit.log("ERROR, FOUND A TERM WITH THE SAME NUMBER THAT IS NOT A WILDCARD! HOW IS THAT???");
                        return null;
                    }
                    if (!targetPattern.contains(wildCard)) {
                        TextRulerToolkit.log("EVEN WORSE, THAT MUST NOT BE AT ALL!");
                        return null;
                    }
                    indexInPattern = targetPattern.indexOf(wildCard);
                    targetPattern.set(indexInPattern, term.copy());
                } else {
                    for (int i = 0; i < targetPattern.size(); ++i) {
                        WhiskRuleItem it = (WhiskRuleItem)targetPattern.get(i);
                        if (termNumber >= it.getTermNumberInExample()) continue;
                        indexInPattern = i;
                        break;
                    }
                    if (indexInPattern < 0) {
                        indexInPattern = targetPattern.size();
                        targetPattern.add(term.copy());
                    } else {
                        targetPattern.add(indexInPattern, term.copy());
                    }
                }
            }
            WhiskRuleItem newTerm = (WhiskRuleItem)targetPattern.get(indexInPattern);
            WhiskRuleItem left = newRule.searchNeighborOfItem(newTerm, true);
            if (left != null && left.getTermNumberInExample() < newTerm.getTermNumberInExample() - 1 && !left.isStarWildCard()) {
                targetPattern.add(indexInPattern, WhiskRuleItem.newWildCardItem(left.getTermNumberInExample() + 1));
                ++indexInPattern;
            }
            if ((right = newRule.searchNeighborOfItem(newTerm, false)) != null && right.getTermNumberInExample() > newTerm.getTermNumberInExample() + 1 && !right.isStarWildCard()) {
                WhiskRuleItem wc = WhiskRuleItem.newWildCardItem(newTerm.getTermNumberInExample() + 1);
                if (indexInPattern + 1 < targetPattern.size()) {
                    targetPattern.add(indexInPattern + 1, wc);
                } else {
                    targetPattern.add(wc);
                }
            }
            newRule.setNeedsCompile(true);
        }
        if (newRule.getRuleString().equals(baseRule.getRuleString())) {
            return null;
        }
        return newRule;
    }

    protected WhiskRule anchor(WhiskRule rule, TextRulerExampleDocument doc, TextRulerExample example, List<WhiskRuleItem> allTerms, int slotIndex) {
        TextRulerAnnotation slotAnnotation = example.getAnnotations()[slotIndex];
        List<WhiskRuleItem> inside = this.getTermsWithinBounds(allTerms, slotAnnotation.getBegin(), slotAnnotation.getEnd());
        if (rule == null || inside.isEmpty()) {
            return null;
        }
        WhiskRule base1 = rule.copy();
        TextRulerSlotPattern slotPattern = base1.getPatterns().get(slotIndex);
        for (int i = 0; i < inside.size(); ++i) {
            if (i == 0 || i == inside.size() - 1) {
                slotPattern.fillerPattern.add(inside.get(i).copy());
                continue;
            }
            if (inside.size() <= 2 || i >= 2) continue;
            slotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(inside.get(i).getTermNumberInExample()));
        }
        WhiskRule base2 = rule.copy();
        slotPattern = base2.getPatterns().get(slotIndex);
        int firstOfSlot = allTerms.indexOf(inside.get(0));
        int lastOfSlot = allTerms.indexOf(inside.get(inside.size() - 1));
        if (firstOfSlot > 0) {
            slotPattern.preFillerPattern.add(allTerms.get(firstOfSlot - 1));
        }
        slotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(inside.get(0).getTermNumberInExample()));
        if (lastOfSlot + 1 < allTerms.size()) {
            slotPattern.postFillerPattern.add(allTerms.get(lastOfSlot + 1));
        }
        TextRulerToolkit.log("base1: " + base1.getRuleString());
        TextRulerToolkit.log("base2: " + base2.getRuleString());
        ArrayList<TextRulerRule> testRules = new ArrayList<TextRulerRule>();
        testRules.add(base1);
        testRules.add(base2);
        this.testRulesIfNotCached(testRules);
        if (this.shouldAbort()) {
            return null;
        }
        TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = " + base1.getLaplacian());
        TextRulerToolkit.log("\tbase2: " + base2.getCoveringStatistics() + " --> laplacian = " + base2.getLaplacian());
        if (base2.getCoveringStatistics().getCoveredPositivesCount() > base1.getCoveringStatistics().getCoveredPositivesCount()) {
            return base2;
        }
        return base1;
    }

    @Override
    public String getResultString() {
        if (this.ruleList != null) {
            return this.getFileHeaderString(true) + this.ruleList.getRulesString("");
        }
        return "No results available yet!";
    }

    @Override
    public void setParameters(Map<String, Object> params) {
        if (params.containsKey(WINDOSIZE_KEY)) {
            this.windowSize = (Integer)params.get(WINDOSIZE_KEY);
        }
        if (params.containsKey(ERROR_THRESHOLD_KEY)) {
            this.errorThreshold = ((Float)params.get(ERROR_THRESHOLD_KEY)).floatValue();
        }
        if (params.containsKey(POSTAG_ROOTTYPE_KEY)) {
            this.posTagRootTypeName = (String)params.get(POSTAG_ROOTTYPE_KEY);
        }
    }

    public List<WhiskRuleItem> getAllTermsOfExample(TextRulerExample example) {
        CAS cas = example.getDocumentCAS();
        Type tokensRootType = cas.getTypeSystem().getType("org.apache.uima.ruta.type.ANY");
        List<AnnotationFS> all = TextRulerToolkit.getAnnotationsWithinBounds(cas, 0, cas.getDocumentText().length() + 1, TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet), tokensRootType);
        ArrayList<WhiskRuleItem> result = new ArrayList<WhiskRuleItem>();
        int i = 0;
        for (AnnotationFS afs : all) {
            WhiskRuleItem term = new WhiskRuleItem(new TextRulerAnnotation(afs, example.getDocument()));
            term.setTermNumberInExample(i);
            ++i;
            result.add(term);
        }
        return result;
    }

    public List<WhiskRuleItem> getTermsWithinBounds(List<WhiskRuleItem> allTerms, int startPos, int endPos) {
        ArrayList<WhiskRuleItem> result = new ArrayList<WhiskRuleItem>();
        for (WhiskRuleItem term : allTerms) {
            TextRulerAnnotation a = term.getWordConstraint().getTokenAnnotation();
            if (a.getBegin() >= startPos && a.getEnd() <= endPos) {
                result.add(term);
            }
            if (a.getEnd() <= endPos) continue;
            break;
        }
        return result;
    }

    protected void testRulesIfNotCached(List<TextRulerRule> rules) {
        String key;
        ArrayList<TextRulerRule> rulesToTest = new ArrayList<TextRulerRule>();
        for (TextRulerRule r : rules) {
            key = r.getRuleString();
            if (this.cachedTestedRuleStatistics.containsKey(key)) {
                r.setCoveringStatistics(this.cachedTestedRuleStatistics.get(key).copy());
                TextRulerToolkit.log("CACHE HIT !");
                continue;
            }
            rulesToTest.add(r);
        }
        if (rulesToTest.size() > 0) {
            this.testRulesOnDocumentSet(rulesToTest, this.exampleDocuments);
            if (this.shouldAbort()) {
                return;
            }
            for (TextRulerRule r : rulesToTest) {
                key = r.getRuleString();
                this.cachedTestedRuleStatistics.put(key, r.getCoveringStatistics().copy());
            }
        }
    }
}

