/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.gui.dictionary.umls;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.ctakes.gui.dictionary.util.FileUtil;
import org.apache.ctakes.gui.dictionary.util.RareWordUtil;

public final class UmlsTermUtil {
    private static final Pattern WHITESPACE = Pattern.compile("\\s+");
    private static final Pattern AUTO_NOTE = Pattern.compile("@");
    private final Collection<String> _removalPrefixTriggers;
    private final Collection<String> _removalSuffixTriggers;
    private final Collection<String> _removalColonTriggers;
    private final Collection<String> _removalFunctionTriggers;
    private final Collection<String> _unwantedPrefixes;
    private final Collection<String> _unwantedSuffixes;
    private final Collection<String> _unwantedTexts;
    private final Collection<String> _modifierSuffixes;
    private final Collection<String> _abbreviations;
    private final Collection<String> _unwantedPosTexts;
    private final Collection<String> _keepPrefixTriggers;

    private static String getDataPath(String dataDir, DATA_FILE dataFile) {
        return dataDir + '/' + dataFile.__name;
    }

    public UmlsTermUtil(String dataDir) {
        this(UmlsTermUtil.getDataPath(dataDir, DATA_FILE.REMOVAL_PREFIX_TRIGGERS), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.REMOVAL_SUFFIX_TRIGGERS), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.REMOVAL_COLON_TRIGGERS), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.REMOVAL_FUNCTION_TRIGGERS), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.UNWANTED_PREFIXES), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.UNWANTED_SUFFIXES), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.UNWANTED_TEXTS), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.MODIFIER_SUFFIXES), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.RIGHT_ABBREVIATIONS), UmlsTermUtil.getDataPath(dataDir, DATA_FILE.KEEP_PREFIX_TRIGGERS));
    }

    public UmlsTermUtil(String removalPrefixTriggersPath, String removalSuffixTriggersPath, String removalColonTriggersPath, String removalFunctionTriggersPath, String unwantedPrefixesPath, String unwantedSuffixesPath, String unwantedTextsPath, String modifierSuffixesPath, String abbreviationsPath, String keepPrefixTriggersPath) {
        this._removalPrefixTriggers = FileUtil.readOneColumn(removalPrefixTriggersPath, "term removal Prefix Triggers");
        this._removalSuffixTriggers = FileUtil.readOneColumn(removalSuffixTriggersPath, "term removal Suffix Triggers");
        this._removalColonTriggers = FileUtil.readOneColumn(removalColonTriggersPath, "term removal Colon Triggers");
        this._removalFunctionTriggers = FileUtil.readOneColumn(removalFunctionTriggersPath, "term removal Function Triggers");
        this._unwantedPrefixes = FileUtil.readOneColumn(unwantedPrefixesPath, "unwanted Prefixes");
        this._unwantedSuffixes = FileUtil.readOneColumn(unwantedSuffixesPath, "unwanted Suffixes");
        this._unwantedTexts = FileUtil.readOneColumn(unwantedTextsPath, "unwanted Texts");
        this._modifierSuffixes = FileUtil.readOneColumn(modifierSuffixesPath, "modifier Suffixes");
        this._abbreviations = FileUtil.readOneColumn(abbreviationsPath, "Abbreviations to expand");
        this._keepPrefixTriggers = FileUtil.readOneColumn(keepPrefixTriggersPath, "term keep Prefix Triggers");
        this._unwantedPosTexts = RareWordUtil.getUnwantedPosTexts();
    }

    public boolean isTextValid(String text) {
        if (text.length() > 255) {
            return false;
        }
        if (this._keepPrefixTriggers.stream().anyMatch(text::startsWith)) {
            return true;
        }
        if (text.startsWith("fh ") || text.startsWith("no fh ") || text.startsWith("family ") || text.startsWith("history ")) {
            return true;
        }
        boolean haveChar = false;
        for (int i = 0; i < text.length(); ++i) {
            if (text.charAt(i) < ' ' || text.charAt(i) > '~') {
                return false;
            }
            if (haveChar || !Character.isAlphabetic(text.charAt(i))) continue;
            haveChar = true;
        }
        if (!haveChar) {
            return false;
        }
        if (text.length() == 3 && text.charAt(0) == '(') {
            return false;
        }
        if (AUTO_NOTE.split(text).length > 2) {
            return false;
        }
        if (this._unwantedTexts.contains(text)) {
            return false;
        }
        if (this._unwantedPosTexts.contains(text)) {
            return false;
        }
        if (this._removalPrefixTriggers.stream().anyMatch(text::startsWith)) {
            return false;
        }
        if (this._removalSuffixTriggers.stream().anyMatch(text::endsWith)) {
            return false;
        }
        if (this._removalColonTriggers.stream().anyMatch(text::contains)) {
            return false;
        }
        return !this._removalFunctionTriggers.stream().anyMatch(text::contains);
    }

    public static boolean isTextTooShort(String text, int minCharLength) {
        return text.length() < minCharLength;
    }

    public static boolean isTextTooLong(String text, int maxCharLength, int maxWordCount, int maxSymCount) {
        if (text.length() > 255) {
            return true;
        }
        String[] splits = WHITESPACE.split(text);
        int wordCount = 0;
        int symCount = 0;
        for (String split : splits) {
            if (split.length() > maxCharLength) {
                return true;
            }
            if (split.length() > 2) {
                ++wordCount;
                continue;
            }
            ++symCount;
        }
        return wordCount > maxWordCount || symCount > maxSymCount;
    }

    public Collection<String> getFormattedTexts(String strippedText, boolean extractAbbreviations, int minCharLength, int maxCharLength, int maxWordCount, int maxSymCount) {
        Collection<Object> extractedTerms = Collections.emptySet();
        if (extractAbbreviations) {
            extractedTerms = this.extractAbbreviations(strippedText);
        }
        if (extractedTerms.isEmpty()) {
            extractedTerms = this.extractModifiers(strippedText);
        }
        if (!extractedTerms.isEmpty()) {
            extractedTerms.add(strippedText);
            return UmlsTermUtil.getFormattedTexts(UmlsTermUtil.getPluralTerms(this.getStrippedTexts(extractedTerms)), minCharLength, maxCharLength, maxWordCount, maxSymCount);
        }
        HashSet<String> texts = new HashSet<String>(1);
        texts.add(strippedText);
        return UmlsTermUtil.getFormattedTexts(UmlsTermUtil.getPluralTerms(this.getStrippedTexts(texts)), minCharLength, maxCharLength, maxWordCount, maxSymCount);
    }

    private static Collection<String> getFormattedTexts(Collection<String> extractedTerms, int minCharLength, int maxCharLength, int maxWordCount, int maxSymCount) {
        return extractedTerms.stream().filter(t -> !UmlsTermUtil.isTextTooShort(t, minCharLength)).filter(t -> !UmlsTermUtil.isTextTooLong(t, maxCharLength, maxWordCount, maxSymCount)).collect(Collectors.toList());
    }

    private static Collection<String> getPluralTerms(Collection<String> texts) {
        Collection plurals = texts.stream().filter(t -> t.endsWith("( s )")).collect(Collectors.toList());
        if (plurals.isEmpty()) {
            return texts;
        }
        texts.removeAll(plurals);
        Consumer<String> addPlural = t -> {
            texts.add((String)t);
            texts.add(t + "s");
        };
        plurals.stream().map(t -> t.substring(0, t.length() - 5)).forEach(addPlural);
        return texts;
    }

    private Collection<String> getStrippedTexts(Collection<String> texts) {
        return texts.stream().map(this::getStrippedText).filter(t -> !t.isEmpty()).collect(Collectors.toSet());
    }

    public String getStrippedText(String text) {
        String strippedText = text.trim();
        int lastLength = Integer.MAX_VALUE;
        while (lastLength != strippedText.length()) {
            lastLength = strippedText.length();
            for (String prefix : this._unwantedPrefixes) {
                if (!strippedText.startsWith(prefix)) continue;
                strippedText = strippedText.substring(prefix.length()).trim();
            }
            for (String suffix : this._unwantedSuffixes) {
                if (!strippedText.endsWith(suffix)) continue;
                strippedText = strippedText.substring(0, strippedText.length() - suffix.length()).trim();
            }
            if (this.isTextValid(strippedText)) continue;
            return "";
        }
        if (strippedText.contains("(") && strippedText.contains("[")) {
            return "";
        }
        return strippedText;
    }

    private Collection<String> extractAbbreviations(String tokenizedText) {
        for (String abbreviation : this._abbreviations) {
            if (!tokenizedText.endsWith(abbreviation) || tokenizedText.contains(":") || tokenizedText.contains(" of ") || tokenizedText.contains(" for ")) continue;
            String noAbbrTerm = tokenizedText.substring(0, tokenizedText.length() - abbreviation.length()).trim();
            String abbrTerm = abbreviation.replace(":", "").replace("(", "").replace(")", "").replace("-", "").replace("[", "").replace("]", "").replace("&", "").trim();
            HashSet<String> extractedAbbreviations = new HashSet<String>(2);
            if (noAbbrTerm.length() < 255) {
                extractedAbbreviations.add(noAbbrTerm);
            }
            if (abbrTerm.length() < 255) {
                extractedAbbreviations.add(abbrTerm);
            }
            return extractedAbbreviations;
        }
        return Collections.emptyList();
    }

    private Collection<String> extractModifiers(String tokenizedText) {
        for (String modifier : this._modifierSuffixes) {
            if (!tokenizedText.endsWith(modifier)) continue;
            String mainText = tokenizedText.substring(0, tokenizedText.length() - modifier.length()).trim();
            String modifierText = modifier.replace("(", "").replace(")", "").replace("-", "").replace(",", "").trim();
            HashSet<String> modifiedTexts = new HashSet<String>(2);
            modifiedTexts.add(tokenizedText);
            modifiedTexts.add(modifierText + " " + mainText);
            return modifiedTexts;
        }
        return Collections.emptyList();
    }

    private static enum DATA_FILE {
        REMOVAL_PREFIX_TRIGGERS("RemovalPrefixTriggers.txt"),
        REMOVAL_SUFFIX_TRIGGERS("RemovalSuffixTriggers.txt"),
        REMOVAL_FUNCTION_TRIGGERS("RemovalFunctionTriggers.txt"),
        REMOVAL_COLON_TRIGGERS("RemovalColonTriggers.txt"),
        UNWANTED_PREFIXES("UnwantedPrefixes.txt"),
        UNWANTED_SUFFIXES("UnwantedSuffixes.txt"),
        UNWANTED_TEXTS("UnwantedTexts.txt"),
        MODIFIER_SUFFIXES("ModifierSuffixes.txt"),
        RIGHT_ABBREVIATIONS("RightAbbreviations.txt"),
        KEEP_PREFIX_TRIGGERS("KeepPrefixTriggers.txt");

        private final String __name;

        private DATA_FILE(String name) {
            this.__name = name;
        }
    }
}

