package comirva.util;

import comirva.data.DataMatrix;
import comirva.data.EntityTermProfile;
import comirva.data.SingleTermList;
import comirva.io.filefilter.HTMLFileFilter;
import cp.net.Webpage;
import cp.util.HashtableTool;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import javax.swing.JLabel;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;

/* loaded from: input_file:comirva/util/TermProfileUtils.class */
public class TermProfileUtils {
    public static DataMatrix getOccurrenceMatrixFromETP(File file) {
        DataMatrix dataMatrix = new DataMatrix();
        EntityTermProfile entityTermProfileFromXML = getEntityTermProfileFromXML(file);
        Vector<Vector<Integer>> termOccurrenceOnDocuments = entityTermProfileFromXML.getTermOccurrenceOnDocuments();
        int intValue = entityTermProfileFromXML.getNumberDocuments().intValue();
        for (int i = 0; i < termOccurrenceOnDocuments.size(); i++) {
            Vector<Integer> elementAt = termOccurrenceOnDocuments.elementAt(i);
            for (int i2 = 0; i2 < intValue; i2++) {
                if (elementAt.contains(new Integer(i2))) {
                    dataMatrix.addValue(new Double(1.0d));
                } else {
                    dataMatrix.addValue(new Double(CMAESOptimizer.DEFAULT_STOPFITNESS));
                }
            }
            dataMatrix.startNewRow();
        }
        dataMatrix.removeLastAddedElement();
        dataMatrix.setName("term occurrences of " + file.getName() + " (" + dataMatrix.getNumberOfRows() + "x" + dataMatrix.getNumberOfColumns() + ")");
        return dataMatrix;
    }

    public static DataMatrix getOccurrenceMatrixFromETP(EntityTermProfile entityTermProfile) {
        DataMatrix dataMatrix = new DataMatrix();
        Vector<Vector<Integer>> termOccurrenceOnDocuments = entityTermProfile.getTermOccurrenceOnDocuments();
        int intValue = entityTermProfile.getNumberDocuments().intValue();
        for (int i = 0; i < termOccurrenceOnDocuments.size(); i++) {
            Vector<Integer> elementAt = termOccurrenceOnDocuments.elementAt(i);
            for (int i2 = 0; i2 < intValue; i2++) {
                if (elementAt.contains(new Integer(i2))) {
                    dataMatrix.addValue(new Double(1.0d));
                } else {
                    dataMatrix.addValue(new Double(CMAESOptimizer.DEFAULT_STOPFITNESS));
                }
            }
            dataMatrix.startNewRow();
        }
        dataMatrix.removeLastAddedElement();
        dataMatrix.setName("term occurrences of " + entityTermProfile.getEntityName() + " (" + dataMatrix.getNumberOfRows() + "x" + dataMatrix.getNumberOfColumns() + ")");
        return dataMatrix;
    }

    public static EntityTermProfile getEntityTermProfileFromXML(File file) {
        EntityTermProfile entityTermProfile = new EntityTermProfile(file);
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), "UTF8");
            XMLStreamReader createXMLStreamReader = XMLInputFactory.newInstance().createXMLStreamReader(inputStreamReader);
            entityTermProfile.readXML(createXMLStreamReader);
            createXMLStreamReader.close();
            inputStreamReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (XMLStreamException e2) {
            e2.printStackTrace();
        } catch (FileNotFoundException e3) {
            e3.printStackTrace();
        } catch (UnsupportedEncodingException e4) {
            e4.printStackTrace();
        }
        return entityTermProfile;
    }

    public static void generateEntityTermProfiles(File file, Vector<String> vector, JLabel jLabel) {
        if (file.isDirectory()) {
            System.currentTimeMillis();
            File[] listFiles = file.listFiles();
            for (int i = 0; i < listFiles.length; i++) {
                if (listFiles[i].isDirectory() && listFiles[i].getName() != "." && listFiles[i].getName() != "..") {
                    File file2 = listFiles[i];
                    if (jLabel != null) {
                        jLabel.setText("<html>Creating ETP for <b>" + file2 + "</b></html>");
                        jLabel.validate();
                        jLabel.paintImmediately(0, 0, jLabel.getWidth(), jLabel.getHeight());
                    }
                    EntityTermProfile entityTermProfile = new EntityTermProfile(file2);
                    entityTermProfile.calculateOccurrences(vector, new HTMLFileFilter());
                    entityTermProfile.setEntityName(file2.toString());
                    String str = String.valueOf(file2.getPath()) + ".xml";
                    File file3 = new File(str);
                    if (jLabel != null) {
                        jLabel.setText("<html>Storing ETP for <b>" + file2 + "</b> in <b>" + str + "</b></html>");
                        jLabel.validate();
                        jLabel.paintImmediately(0, 0, jLabel.getWidth(), jLabel.getHeight());
                    }
                    try {
                        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file3), "UTF8");
                        entityTermProfile.writeXML(XMLOutputFactory.newInstance().createXMLStreamWriter(outputStreamWriter));
                        outputStreamWriter.close();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    public static void generateEntityTermProfiles(File file, Vector<String> vector) {
        generateEntityTermProfiles(file, vector, null);
    }

    public static Hashtable getTermsWithHighestOccurrence(DataMatrix dataMatrix, Vector<String> vector, int i) {
        int[] iArr = new int[dataMatrix.getNumberOfRows()];
        Hashtable hashtable = new Hashtable();
        Vector vector2 = new Vector();
        Vector vector3 = new Vector();
        if (dataMatrix.getNumberOfRows() != vector.size() || dataMatrix == null || dataMatrix.getNumberOfColumns() == 0 || !dataMatrix.isBooleanMatrix()) {
            hashtable = null;
        } else {
            for (int i2 = 0; i2 < dataMatrix.getNumberOfRows(); i2++) {
                Enumeration<Double> elements = dataMatrix.getRow(i2).elements();
                while (elements.hasMoreElements()) {
                    int i3 = i2;
                    iArr[i3] = iArr[i3] + elements.nextElement().intValue();
                }
            }
            for (int i4 = 0; i4 < dataMatrix.getNumberOfRows(); i4++) {
                if (iArr[i4] != 0) {
                    vector2.addElement(new Double(iArr[i4]));
                    vector3.addElement(vector.elementAt(i4));
                }
            }
            VectorSort.sortWithMetaData(vector2, vector3);
            for (int i5 = 0; i5 < i && i5 < vector3.size(); i5++) {
                hashtable.put((String) vector3.elementAt(i5), new Integer(((Double) vector2.elementAt(i5)).intValue()));
            }
        }
        return hashtable;
    }

    public static Hashtable getTermsWithHighestTFxIDF(Vector<Double> vector, Vector<String> vector2, int i) {
        Hashtable hashtable = new Hashtable();
        Vector vector3 = new Vector();
        Vector vector4 = new Vector();
        if (vector.size() == vector2.size()) {
            for (int i2 = 0; i2 < vector.size(); i2++) {
                if (vector.elementAt(i2).doubleValue() != CMAESOptimizer.DEFAULT_STOPFITNESS) {
                    vector3.addElement(new Double(vector.elementAt(i2).doubleValue()));
                    vector4.addElement(vector2.elementAt(i2));
                }
            }
            VectorSort.sortWithMetaData(vector3, vector4);
            for (int i3 = 0; i3 < i && i3 < vector4.size(); i3++) {
                hashtable.put((String) vector4.elementAt(i3), new Double(((Double) vector3.elementAt(i3)).doubleValue()));
            }
        } else {
            hashtable = null;
        }
        return hashtable;
    }

    public static Hashtable<String, Integer> getNonZeroOccurringTerms(DataMatrix dataMatrix, Vector<String> vector) {
        return getTermsWithHighestOccurrence(dataMatrix, vector, vector.size());
    }

    public static DataMatrix getSubsetOfTermOccurrenceMatrix(DataMatrix dataMatrix, Vector<String> vector, Vector<String> vector2, Vector<Integer> vector3) {
        DataMatrix dataMatrix2 = new DataMatrix();
        if (dataMatrix == null || !dataMatrix.isBooleanMatrix() || vector2 == null) {
            dataMatrix2 = null;
        } else {
            double[][] doubleArray = dataMatrix.toDoubleArray();
            int length = doubleArray.length;
            int length2 = doubleArray[0].length;
            if (vector3 == null) {
                vector3 = new Vector<>();
            }
            int[] iArr = new int[vector2.size()];
            for (int i = 0; i < vector2.size(); i++) {
                iArr[i] = vector.indexOf(vector2.elementAt(i));
            }
            for (int i2 = 0; i2 < length2; i2++) {
                boolean z = true;
                for (int i3 : iArr) {
                    if (doubleArray[i3][i2] != 1.0d) {
                        z = false;
                    }
                }
                if (z) {
                    vector3.addElement(new Integer(i2));
                }
            }
            for (int i4 = 0; i4 < vector.size(); i4++) {
                for (int i5 = 0; i5 < vector3.size(); i5++) {
                    dataMatrix2.addValue(new Double(doubleArray[i4][vector3.elementAt(i5).intValue()]));
                }
                dataMatrix2.startNewRow();
            }
            dataMatrix2.removeLastAddedElement();
        }
        return dataMatrix2;
    }

    public static DataMatrix getSubsetOfTermOccurrenceMatrix(DataMatrix dataMatrix, Vector<String> vector, Vector<String> vector2) {
        return getSubsetOfTermOccurrenceMatrix(dataMatrix, vector, vector2, null);
    }

    public static Vector<String> getMaskedDocumentPaths(Vector<String> vector, Vector<Integer> vector2) {
        Vector<String> vector3 = new Vector<>();
        if (vector == null || vector2 == null) {
            vector3 = null;
        } else {
            for (int i = 0; i < vector2.size(); i++) {
                int intValue = vector2.elementAt(i).intValue();
                if (intValue >= 0 && intValue < vector.size()) {
                    vector3.addElement(vector.elementAt(intValue));
                }
            }
        }
        return vector3;
    }

    public static Vector<String> extractTermsFromDocuments(File file, FileFilter fileFilter, JLabel jLabel) {
        Vector<String> vector = new Vector<>();
        File[] listFiles = file.listFiles(fileFilter);
        Hashtable hashtable = new Hashtable();
        for (int i = 0; i < listFiles.length; i++) {
            if (jLabel != null) {
                try {
                    jLabel.setText("<html>Extracting terms from document <b>" + listFiles[i] + "</b></html>");
                    jLabel.validate();
                    jLabel.paintImmediately(0, 0, jLabel.getWidth(), jLabel.getHeight());
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            HashtableTool.updateWordsOccurrences(new Webpage(listFiles[i]).getPlainText(), hashtable, (Hashtable) null, Webpage.delimiterstring);
        }
        if (hashtable.isEmpty()) {
            vector = null;
        } else {
            if (jLabel != null) {
                jLabel.setText("<html>Preparing data of extracted terms.</html>");
                jLabel.validate();
                jLabel.paintImmediately(0, 0, jLabel.getWidth(), jLabel.getHeight());
            }
            Enumeration keys = hashtable.keys();
            while (keys.hasMoreElements()) {
                vector.addElement((String) keys.nextElement());
            }
        }
        if (jLabel != null) {
            jLabel.setText("<html>Extraction of terms from <b>" + file + "</b> finished.</html>");
            jLabel.validate();
            jLabel.paintImmediately(0, 0, jLabel.getWidth(), jLabel.getHeight());
        }
        return vector;
    }

    public static Vector<String> extractTermsFromDocuments(File file, FileFilter fileFilter) {
        return extractTermsFromDocuments(file, fileFilter, null);
    }

    public static void updatePathsInETP(File file) {
        String path = getEntityTermProfileFromXML(file).getDirLocal().getPath();
        int lastIndexOf = path.lastIndexOf(File.separator);
        if (lastIndexOf == path.length() - 1) {
            path = path.substring(0, path.length() - 1);
            lastIndexOf = path.lastIndexOf(File.separator);
        }
        String str = String.valueOf(file.getParent()) + File.separator + path.substring(lastIndexOf + 1, path.length()) + File.separator;
        if (new File(str).equals(new File(path))) {
            return;
        }
        String fileContent = getFileContent(file);
        String str2 = path;
        String concat = path.concat(File.separator);
        String replace = fileContent.replace(concat, str);
        if (File.separator.equals(new String("\\"))) {
            concat = concat.replace("\\", "/");
        } else if (File.separator.equals(new String("/"))) {
            concat = concat.replace("/", "\\");
        }
        String replace2 = replace.replace(concat, str);
        String str3 = str2;
        String replace3 = replace2.replace(str3, str);
        if (File.separator.equals(new String("\\"))) {
            str3 = str3.replace("\\", "/");
        } else if (File.separator.equals(new String("/"))) {
            str3 = str3.replace("/", "\\");
        }
        setFileContent(file, replace3.replace(str3, str));
        Vector<SingleTermList> singleTermLists = getEntityTermProfileFromXML(file).getSingleTermLists();
        for (int i = 0; i < singleTermLists.size(); i++) {
            SingleTermList elementAt = singleTermLists.elementAt(i);
            elementAt.setFileLocal(new File(String.valueOf(str) + elementAt.getFileLocal().getName()));
            try {
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(String.valueOf(elementAt.getFileLocal().toString()) + ".xml")), "UTF8");
                elementAt.writeXML(XMLOutputFactory.newInstance().createXMLStreamWriter(outputStreamWriter));
                outputStreamWriter.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    public static String getFileContent(File file) {
        StringBuffer stringBuffer = new StringBuffer();
        BufferedReader bufferedReader = null;
        try {
            try {
                try {
                    bufferedReader = new BufferedReader(new FileReader(file));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        stringBuffer.append(readLine);
                        stringBuffer.append(System.getProperty("line.separator"));
                    }
                    if (bufferedReader != null) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                } catch (Throwable th) {
                    if (bufferedReader != null) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e2) {
                            e2.printStackTrace();
                        }
                    }
                    throw th;
                }
            } catch (IOException e3) {
                e3.printStackTrace();
                if (bufferedReader != null) {
                    try {
                        bufferedReader.close();
                    } catch (IOException e4) {
                        e4.printStackTrace();
                    }
                }
            }
        } catch (FileNotFoundException e5) {
            e5.printStackTrace();
            if (bufferedReader != null) {
                try {
                    bufferedReader.close();
                } catch (IOException e6) {
                    e6.printStackTrace();
                }
            }
        }
        return stringBuffer.toString();
    }

    public static void setFileContent(File file, String str) {
        BufferedWriter bufferedWriter = null;
        try {
            try {
                bufferedWriter = new BufferedWriter(new FileWriter(file));
                bufferedWriter.write(str);
                if (bufferedWriter != null) {
                    try {
                        bufferedWriter.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            } catch (IOException e2) {
                e2.printStackTrace();
                if (bufferedWriter != null) {
                    try {
                        bufferedWriter.close();
                    } catch (IOException e3) {
                        e3.printStackTrace();
                    }
                }
            }
        } catch (Throwable th) {
            if (bufferedWriter != null) {
                try {
                    bufferedWriter.close();
                } catch (IOException e4) {
                    e4.printStackTrace();
                }
            }
            throw th;
        }
    }
}
