/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.gui.dictionary;

import java.io.File;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.ctakes.core.util.collection.CollectionMap;
import org.apache.ctakes.core.util.collection.HashSetMap;
import org.apache.ctakes.gui.dictionary.DictionaryXmlWriter;
import org.apache.ctakes.gui.dictionary.umls.Concept;
import org.apache.ctakes.gui.dictionary.umls.ConceptMapFactory;
import org.apache.ctakes.gui.dictionary.umls.MrconsoParser;
import org.apache.ctakes.gui.dictionary.umls.Tui;
import org.apache.ctakes.gui.dictionary.umls.TuiTableModel;
import org.apache.ctakes.gui.dictionary.umls.UmlsTermUtil;
import org.apache.ctakes.gui.dictionary.util.HsqlUtil;
import org.apache.ctakes.gui.dictionary.util.JdbcUtil;
import org.apache.ctakes.gui.dictionary.util.RareWordDbWriter;
import org.apache.log4j.Logger;

final class DictionaryBuilder {
    private static final Logger LOGGER = Logger.getLogger((String)"DictionaryBuilder");
    private static final String DEFAULT_DATA_DIR = "org/apache/ctakes/gui/dictionary/data/tiny";
    private static final String CTAKES_APP_DB_PATH = "resources/org/apache/ctakes/dictionary/lookup/fast";
    private static final String CTAKES_RES_MODULE = "ctakes-dictionary-lookup-fast-res";
    private static final String CTAKES_RES_DB_PATH = "ctakes-dictionary-lookup-fast-res/src/main/resources/org/apache/ctakes/dictionary/lookup/fast";
    private static final int MIN_CHAR_LENGTH = 2;
    private static final int MAX_CHAR_LENGTH = 48;
    private static final int MAX_WORD_COUNT = 12;
    private static final int MAX_SYM_COUNT = 7;
    private static final int WSD_DIVISOR = 2;
    private static final int ANAT_MULTIPLIER = 2;

    private DictionaryBuilder() {
    }

    static boolean buildDictionary(String umlsDirPath, String ctakesDirPath, String dictionaryName, Collection<String> wantedLanguages, Collection<String> wantedSources, Collection<String> wantedTargets, Collection<Tui> wantedTuis) {
        UmlsTermUtil umlsTermUtil = new UmlsTermUtil(DEFAULT_DATA_DIR);
        Map<Long, Concept> conceptMap = DictionaryBuilder.parseAll(umlsTermUtil, umlsDirPath, wantedLanguages, wantedSources, wantedTargets, wantedTuis);
        return DictionaryBuilder.writeDatabase(ctakesDirPath, dictionaryName, conceptMap);
    }

    private static Map<Long, Concept> parseAll(UmlsTermUtil umlsTermUtil, String umlsDirPath, Collection<String> wantedLanguages, Collection<String> wantedSources, Collection<String> wantedTargets, Collection<Tui> wantedTuis) {
        LOGGER.info((Object)"Parsing Concepts");
        Map<Long, Concept> conceptMap = ConceptMapFactory.createInitialConceptMap(umlsDirPath, wantedSources, wantedTuis);
        MrconsoParser.parseAllConcepts(umlsDirPath, conceptMap, wantedSources, wantedTargets, umlsTermUtil, wantedLanguages, true, 2, 48, 12, 7);
        DictionaryBuilder.removeWsdRarities(conceptMap, wantedTuis, 2, 2);
        LOGGER.info((Object)"Done Parsing Concepts");
        return conceptMap;
    }

    private static void removeWsdRarities(Map<Long, Concept> conceptMap, Collection<Tui> wantedTuis, int wsdDivisor, int anatMultiplier) {
        LOGGER.info((Object)"Performing Poor man's WSD ...");
        ArrayList<Tui> wantedAnatTuis = new ArrayList<Tui>(wantedTuis);
        wantedAnatTuis.retainAll(Arrays.asList(TuiTableModel.CTAKES_ANAT));
        HashSetMap synonymCodeMap = new HashSetMap(500000);
        for (Concept concept : conceptMap.values()) {
            concept.cullExtensions();
            concept.getTexts().forEach(arg_0 -> DictionaryBuilder.lambda$removeWsdRarities$0((CollectionMap)synonymCodeMap, concept, arg_0));
        }
        for (Map.Entry synonymConcepts : synonymCodeMap) {
            Collection concepts = (Collection)synonymConcepts.getValue();
            if (concepts.size() == 1) continue;
            String synonym = (String)synonymConcepts.getKey();
            int maxCount = 0;
            for (Concept concept : concepts) {
                int count = concept.getCount(synonym);
                if (wantedAnatTuis.containsAll(concept.getTuis())) {
                    count *= anatMultiplier;
                }
                maxCount = Math.max(maxCount, count);
            }
            if (maxCount <= 1) continue;
            int threshold = (int)Math.floor((double)maxCount / (double)wsdDivisor);
            for (Concept concept : concepts) {
                int count = concept.getCount(synonym);
                if (wantedAnatTuis.containsAll(concept.getTuis())) {
                    count *= anatMultiplier;
                }
                if (count > threshold) continue;
                concept.removeText(synonym);
            }
        }
        ArrayList<Long> empties = new ArrayList<Long>();
        int textCount = 0;
        for (Map.Entry<Long, Concept> entry : conceptMap.entrySet()) {
            Concept concept = entry.getValue();
            if (concept.isEmpty()) {
                empties.add(entry.getKey());
                continue;
            }
            textCount += concept.getSynonymCount();
        }
        conceptMap.keySet().removeAll(empties);
        LOGGER.info((Object)("Concepts: " + conceptMap.size() + "  Texts: " + textCount));
    }

    private static void removeAnatTexts(Map<Long, Concept> conceptMap, Collection<Tui> wantedTuis) {
        LOGGER.info((Object)"Removing Non-Anatomy synonyms that are also Anatomy synonyms ...");
        ArrayList<Tui> wantedAnatTuis = new ArrayList<Tui>(wantedTuis);
        wantedAnatTuis.retainAll(Arrays.asList(TuiTableModel.CTAKES_ANAT));
        Collection anatTexts = conceptMap.values().stream().filter(c -> wantedAnatTuis.containsAll(c.getTuis())).map(Concept::getTexts).flatMap(Collection::stream).collect(Collectors.toSet());
        ArrayList<Tui> nonAnatTuis = new ArrayList<Tui>(wantedTuis);
        nonAnatTuis.removeAll(Arrays.asList(TuiTableModel.CTAKES_ANAT));
        ArrayList<Long> empties = new ArrayList<Long>();
        int textCount = 0;
        for (Map.Entry<Long, Concept> entry : conceptMap.entrySet()) {
            Concept concept = entry.getValue();
            if (nonAnatTuis.containsAll(concept.getTuis())) {
                concept.removeTexts(anatTexts);
            }
            if (concept.isEmpty()) {
                empties.add(entry.getKey());
                continue;
            }
            textCount += concept.getSynonymCount();
        }
        conceptMap.keySet().removeAll(empties);
        LOGGER.info((Object)("Concepts: " + conceptMap.size() + "  Texts: " + textCount));
    }

    private static void removeUnwantedDrugs(Map<Long, Concept> conceptMap, Collection<Tui> wantedTuis) {
        LOGGER.info((Object)"Removing Drug Concepts not in rxnorm ...");
        ArrayList<Tui> drugTuis = new ArrayList<Tui>(wantedTuis);
        drugTuis.retainAll(Arrays.asList(TuiTableModel.CTAKES_DRUG));
        ArrayList<Tui> nonDrugTuis = new ArrayList<Tui>(wantedTuis);
        nonDrugTuis.removeAll(Arrays.asList(TuiTableModel.CTAKES_DRUG));
        ArrayList<Long> empties = new ArrayList<Long>();
        int textCount = 0;
        for (Map.Entry<Long, Concept> entry : conceptMap.entrySet()) {
            Concept concept = entry.getValue();
            LOGGER.info((Object)concept.getPreferredText());
            if (drugTuis.containsAll(concept.getTuis()) && concept.getVocabularies().contains("RXNORM")) {
                LOGGER.info((Object)"drug");
                textCount += concept.getSynonymCount();
                continue;
            }
            if (nonDrugTuis.containsAll(concept.getTuis()) && !concept.getVocabularies().contains("RXNORM")) {
                LOGGER.info((Object)"not drug");
                textCount += concept.getSynonymCount();
                continue;
            }
            LOGGER.info((Object)("bad " + drugTuis.containsAll(concept.getTuis()) + " " + nonDrugTuis.containsAll(concept.getTuis()) + " " + concept.getVocabularies().contains("RXNORM")));
            empties.add(entry.getKey());
        }
        conceptMap.keySet().removeAll(empties);
        LOGGER.info((Object)("Concepts: " + conceptMap.size() + "  Texts: " + textCount));
    }

    private static boolean writeDatabase(String ctakesDirPath, String dictionaryName, Map<Long, Concept> conceptMap) {
        String url;
        Connection connection;
        File ctakesRoot = new File(ctakesDirPath);
        String databaseDirPath = ctakesDirPath + "/" + CTAKES_APP_DB_PATH;
        if (Arrays.asList(ctakesRoot.list()).contains(CTAKES_RES_MODULE)) {
            databaseDirPath = ctakesDirPath + "/" + CTAKES_RES_DB_PATH;
        }
        if (!HsqlUtil.createDatabase(connection = JdbcUtil.createDatabaseConnection(url = "jdbc:hsqldb:file:" + databaseDirPath.replace('\\', '/') + "/" + dictionaryName + "/" + dictionaryName, "SA", ""))) {
            return false;
        }
        if (!DictionaryXmlWriter.writeXmlFile(databaseDirPath, dictionaryName)) {
            return false;
        }
        return RareWordDbWriter.writeConcepts(connection, conceptMap);
    }

    private static /* synthetic */ void lambda$removeWsdRarities$0(CollectionMap synonymCodeMap, Concept concept, String t) {
        synonymCodeMap.placeValue((Object)t, (Object)concept);
    }
}

