package comirva.data;

import com.sun.jna.platform.win32.COM.tlb.imp.TlbBase;
import comirva.audio.XMLSerializable;
import comirva.util.TermProfileUtils;
import comirva.util.VectorSort;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;
import org.springframework.beans.propertyeditors.CustomBooleanEditor;

/* loaded from: input_file:comirva/data/EntityTermProfile.class */
public class EntityTermProfile implements XMLSerializable, Serializable {
    Vector<SingleTermList> singleTermLists;
    Vector<String> terms;
    Hashtable<String, Long> termFrequency;
    Vector<Vector<Integer>> termOccurrenceOnDocuments;
    Hashtable<String, Integer> documentFrequency;
    Hashtable<String, Double> TFxIDF;
    Hashtable<String, Double> IDF;
    int[][] tfDocs;
    File dirLocal;
    String entityName;
    Integer numberDocuments;
    String crawlDetails;
    Vector<String> extAudio;
    Vector<String> extImage;
    Vector<String> extVideo;
    private boolean isDebugMode = false;

    public EntityTermProfile(File file) {
        this.dirLocal = file;
    }

    public EntityTermProfile() {
    }

    public void calculateOccurrences(Vector<String> vector, FileFilter fileFilter) {
        if (this.dirLocal.isDirectory()) {
            this.singleTermLists = new Vector<>();
            File file = new File(this.dirLocal, "urls.dat");
            Vector vector2 = null;
            if (file.exists()) {
                vector2 = new Vector();
                StringTokenizer stringTokenizer = new StringTokenizer(TermProfileUtils.getFileContent(file), System.getProperty("line.separator"));
                while (stringTokenizer.hasMoreElements()) {
                    vector2.add(stringTokenizer.nextToken());
                }
            }
            if (this.isDebugMode) {
                System.out.println("extracting single term lists");
            }
            File[] listFiles = this.dirLocal.listFiles(fileFilter);
            this.numberDocuments = new Integer(listFiles.length);
            for (int i = 0; i < listFiles.length; i++) {
                if (this.isDebugMode) {
                    System.out.println("processing " + listFiles[i]);
                }
                SingleTermList singleTermList = new SingleTermList(listFiles[i]);
                singleTermList.setExtAudio(this.extAudio);
                singleTermList.setExtImage(this.extImage);
                singleTermList.setExtVideo(this.extVideo);
                if (vector2 != null && vector2.size() == listFiles.length) {
                    singleTermList.setUrlSource((String) vector2.elementAt(i));
                }
                singleTermList.calculateOccurrences(vector);
                try {
                    singleTermList.writeXML(XMLOutputFactory.newInstance().createXMLStreamWriter(new OutputStreamWriter(new FileOutputStream(new File(String.valueOf(listFiles[i].getPath()) + ".xml")), "UTF8")));
                } catch (Exception e) {
                    e.printStackTrace();
                }
                this.singleTermLists.add(singleTermList);
            }
            if (this.isDebugMode) {
                System.out.println("constructing entity term list");
            }
            this.terms = new Vector<>();
            for (int i2 = 0; i2 < this.singleTermLists.size(); i2++) {
                Enumeration<String> keys = this.singleTermLists.elementAt(i2).getFrequency().keys();
                while (keys.hasMoreElements()) {
                    String nextElement = keys.nextElement();
                    if (!this.terms.contains(nextElement)) {
                        this.terms.addElement(nextElement);
                    }
                }
            }
            this.termOccurrenceOnDocuments = new Vector<>();
            for (int i3 = 0; i3 < this.terms.size(); i3++) {
                this.termOccurrenceOnDocuments.addElement(new Vector<>());
            }
            this.termFrequency = new Hashtable<>();
            for (int i4 = 0; i4 < this.terms.size(); i4++) {
                this.termFrequency.put(this.terms.elementAt(i4), new Long(0L));
            }
            if (this.isDebugMode) {
                System.out.println("calculating entity's term frequencies");
            }
            this.tfDocs = new int[vector.size()][listFiles.length];
            for (int i5 = 0; i5 < this.singleTermLists.size(); i5++) {
                SingleTermList elementAt = this.singleTermLists.elementAt(i5);
                for (int i6 = 0; i6 < this.terms.size(); i6++) {
                    String elementAt2 = this.terms.elementAt(i6);
                    Hashtable<String, Integer> frequency = elementAt.getFrequency();
                    if (frequency.containsKey(elementAt2)) {
                        Integer num = frequency.get(elementAt2);
                        this.tfDocs[i6][i5] = num.intValue();
                        if (num.intValue() != 0) {
                            if (this.termFrequency.containsKey(elementAt2)) {
                                this.termFrequency.put(elementAt2, new Long(this.termFrequency.get(elementAt2).intValue() + num.intValue()));
                            } else {
                                this.termFrequency.put(elementAt2, new Long(num.intValue()));
                            }
                            this.termOccurrenceOnDocuments.elementAt(i6).addElement(new Integer(i5));
                        }
                    }
                }
            }
            if (this.isDebugMode) {
                System.out.println("calculating entity's document frequencies");
            }
            this.documentFrequency = new Hashtable<>();
            for (int i7 = 0; i7 < this.termOccurrenceOnDocuments.size(); i7++) {
                this.documentFrequency.put(this.terms.elementAt(i7), new Integer(this.termOccurrenceOnDocuments.elementAt(i7).size()));
            }
            if (this.isDebugMode) {
                System.out.println("calculating entity's TFxIDFs");
            }
            this.TFxIDF = new Hashtable<>();
            for (int i8 = 0; i8 < this.terms.size(); i8++) {
                String elementAt3 = this.terms.elementAt(i8);
                long longValue = this.termFrequency.get(elementAt3).longValue();
                long longValue2 = this.documentFrequency.get(elementAt3).longValue();
                this.TFxIDF.put(elementAt3, longValue2 != 0 ? new Double(longValue * Math.log(this.numberDocuments.intValue() / longValue2)) : new Double(CMAESOptimizer.DEFAULT_STOPFITNESS));
            }
            Vector vector3 = new Vector();
            Vector vector4 = new Vector();
            Enumeration<String> keys2 = this.TFxIDF.keys();
            while (keys2.hasMoreElements()) {
                String nextElement2 = keys2.nextElement();
                Double d = this.TFxIDF.get(nextElement2);
                vector3.addElement(nextElement2);
                vector4.addElement(d);
            }
            VectorSort.sortWithMetaData(vector4, vector3);
            if (this.isDebugMode) {
                for (int i9 = 0; i9 < vector3.size(); i9++) {
                    System.out.println(vector3.elementAt(i9) + TlbBase.TAB + vector4.elementAt(i9));
                }
            }
        }
    }

    @Override // comirva.audio.XMLSerializable
    public void writeXML(XMLStreamWriter xMLStreamWriter) {
        Double d;
        Integer num;
        Long l;
        try {
            xMLStreamWriter.writeStartDocument();
            xMLStreamWriter.writeStartElement("EntityTermProfile");
            if (getEntityName() != null) {
                xMLStreamWriter.writeStartElement("EntityName");
                xMLStreamWriter.writeCharacters(getEntityName().toString());
                xMLStreamWriter.writeEndElement();
            }
            if (getDirLocal() != null) {
                xMLStreamWriter.writeStartElement("LocalDirectory");
                xMLStreamWriter.writeCharacters(getDirLocal().toString());
                xMLStreamWriter.writeEndElement();
            }
            if (getNumberDocuments() != null) {
                xMLStreamWriter.writeStartElement("NumberDocuments");
                xMLStreamWriter.writeCharacters(getNumberDocuments().toString());
                xMLStreamWriter.writeEndElement();
            }
            if (getCrawlDetails() != null) {
                xMLStreamWriter.writeStartElement("CrawlDetails");
                xMLStreamWriter.writeCharacters(getCrawlDetails().toString());
                xMLStreamWriter.writeEndElement();
            }
            if (getTermFrequency() != null) {
                Enumeration<String> keys = getTermFrequency().keys();
                while (keys.hasMoreElements()) {
                    String nextElement = keys.nextElement();
                    if (nextElement != null && (l = getTermFrequency().get(nextElement)) != null) {
                        xMLStreamWriter.writeStartElement("TF");
                        xMLStreamWriter.writeAttribute("term", nextElement);
                        xMLStreamWriter.writeCharacters(l.toString());
                        xMLStreamWriter.writeEndElement();
                    }
                }
            }
            if (getDocumentFrequency() != null) {
                Enumeration<String> keys2 = getDocumentFrequency().keys();
                while (keys2.hasMoreElements()) {
                    String nextElement2 = keys2.nextElement();
                    if (nextElement2 != null && (num = getDocumentFrequency().get(nextElement2)) != null) {
                        xMLStreamWriter.writeStartElement("DF");
                        xMLStreamWriter.writeAttribute("term", nextElement2);
                        xMLStreamWriter.writeCharacters(num.toString());
                        xMLStreamWriter.writeEndElement();
                    }
                }
            }
            if (getTermOccurrenceOnDocuments() != null) {
                Vector<Vector<Integer>> termOccurrenceOnDocuments = getTermOccurrenceOnDocuments();
                for (int i = 0; i < termOccurrenceOnDocuments.size(); i++) {
                    Vector<Integer> elementAt = termOccurrenceOnDocuments.elementAt(i);
                    if (elementAt != null) {
                        xMLStreamWriter.writeStartElement("TermOccurrenceOnDocuments");
                        if (this.terms != null) {
                            xMLStreamWriter.writeAttribute("term", this.terms.elementAt(i));
                        }
                        Enumeration<Integer> elements = elementAt.elements();
                        while (elements.hasMoreElements()) {
                            Integer nextElement3 = elements.nextElement();
                            xMLStreamWriter.writeStartElement("DocumentIndex");
                            xMLStreamWriter.writeAttribute("TF", Integer.toString(this.tfDocs[i][nextElement3.intValue()]));
                            xMLStreamWriter.writeCharacters(nextElement3.toString());
                            xMLStreamWriter.writeEndElement();
                        }
                        xMLStreamWriter.writeEndElement();
                    }
                }
            }
            if (getTFxIDF() != null) {
                Enumeration<String> keys3 = getTFxIDF().keys();
                while (keys3.hasMoreElements()) {
                    String nextElement4 = keys3.nextElement();
                    if (nextElement4 != null && (d = getTFxIDF().get(nextElement4)) != null) {
                        xMLStreamWriter.writeStartElement("TFxIDF");
                        xMLStreamWriter.writeAttribute("term", nextElement4);
                        xMLStreamWriter.writeCharacters(d.toString());
                        xMLStreamWriter.writeEndElement();
                    }
                }
            }
            if (this.singleTermLists != null) {
                Enumeration<SingleTermList> elements2 = this.singleTermLists.elements();
                while (elements2.hasMoreElements()) {
                    String str = elements2.nextElement().getFileLocal() + ".xml";
                    if (str != null) {
                        xMLStreamWriter.writeStartElement("SingleTermListFile");
                        xMLStreamWriter.writeCharacters(str);
                        xMLStreamWriter.writeEndElement();
                    }
                }
            }
            if (this.singleTermLists != null) {
                Enumeration<SingleTermList> elements3 = this.singleTermLists.elements();
                int i2 = 0;
                while (elements3.hasMoreElements()) {
                    SingleTermList nextElement5 = elements3.nextElement();
                    String urlSource = nextElement5.getUrlSource();
                    if (urlSource != null) {
                        xMLStreamWriter.writeStartElement("DocumentInfo");
                        xMLStreamWriter.writeAttribute(CustomBooleanEditor.VALUE_NO, Integer.toString(i2));
                        xMLStreamWriter.writeAttribute("url", urlSource);
                        Vector<String> audioContent = nextElement5.getAudioContent();
                        if (audioContent != null) {
                            for (int i3 = 0; i3 < audioContent.size(); i3++) {
                                xMLStreamWriter.writeStartElement("Content");
                                xMLStreamWriter.writeAttribute("type", "audio");
                                xMLStreamWriter.writeAttribute("url", audioContent.elementAt(i3));
                                xMLStreamWriter.writeEndElement();
                            }
                        }
                        Vector<String> imageContent = nextElement5.getImageContent();
                        if (imageContent != null) {
                            for (int i4 = 0; i4 < imageContent.size(); i4++) {
                                xMLStreamWriter.writeStartElement("Content");
                                xMLStreamWriter.writeAttribute("type", "image");
                                xMLStreamWriter.writeAttribute("url", imageContent.elementAt(i4));
                                xMLStreamWriter.writeEndElement();
                            }
                        }
                        Vector<String> videoContent = nextElement5.getVideoContent();
                        if (videoContent != null) {
                            for (int i5 = 0; i5 < videoContent.size(); i5++) {
                                xMLStreamWriter.writeStartElement("Content");
                                xMLStreamWriter.writeAttribute("type", "video");
                                xMLStreamWriter.writeAttribute("url", videoContent.elementAt(i5));
                                xMLStreamWriter.writeEndElement();
                            }
                        }
                        xMLStreamWriter.writeEndElement();
                    }
                    i2++;
                }
            }
            xMLStreamWriter.writeEndElement();
            xMLStreamWriter.writeEndDocument();
            xMLStreamWriter.flush();
            xMLStreamWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override // comirva.audio.XMLSerializable
    public void readXML(XMLStreamReader xMLStreamReader) {
        this.singleTermLists = null;
        this.terms = null;
        this.termFrequency = null;
        this.termOccurrenceOnDocuments = null;
        this.documentFrequency = null;
        this.TFxIDF = null;
        this.dirLocal = null;
        this.entityName = null;
        this.numberDocuments = null;
        this.crawlDetails = null;
        this.singleTermLists = new Vector<>();
        this.terms = new Vector<>();
        this.termFrequency = new Hashtable<>();
        this.termOccurrenceOnDocuments = new Vector<>();
        this.documentFrequency = new Hashtable<>();
        this.TFxIDF = new Hashtable<>();
        try {
            xMLStreamReader.require(7, (String) null, (String) null);
            xMLStreamReader.next();
            xMLStreamReader.require(1, (String) null, "EntityTermProfile");
            xMLStreamReader.nextTag();
            while (xMLStreamReader.isStartElement()) {
                String localName = xMLStreamReader.getLocalName();
                if (localName.equals("EntityName")) {
                    this.entityName = new String(xMLStreamReader.getElementText());
                    xMLStreamReader.require(2, (String) null, "EntityName");
                } else if (localName.equals("LocalDirectory")) {
                    this.dirLocal = new File(xMLStreamReader.getElementText());
                    xMLStreamReader.require(2, (String) null, "LocalDirectory");
                } else if (localName.equals("NumberDocuments")) {
                    this.numberDocuments = new Integer(xMLStreamReader.getElementText());
                    xMLStreamReader.require(2, (String) null, "NumberDocuments");
                } else if (localName.equals("CrawlDetails")) {
                    this.crawlDetails = xMLStreamReader.getElementText();
                    xMLStreamReader.require(2, (String) null, "CrawlDetails");
                } else if (localName.equals("TF")) {
                    this.termFrequency.put(xMLStreamReader.getAttributeValue(0).toString(), new Long(xMLStreamReader.getElementText()));
                    xMLStreamReader.require(2, (String) null, "TF");
                } else if (localName.equals("DF")) {
                    this.documentFrequency.put(xMLStreamReader.getAttributeValue(0).toString(), new Integer(xMLStreamReader.getElementText()));
                    xMLStreamReader.require(2, (String) null, "DF");
                } else if (localName.equals("TFxIDF")) {
                    this.TFxIDF.put(xMLStreamReader.getAttributeValue(0).toString(), new Double(xMLStreamReader.getElementText()));
                    xMLStreamReader.require(2, (String) null, "TFxIDF");
                } else if (localName.equals("TermOccurrenceOnDocuments")) {
                    String str = xMLStreamReader.getAttributeValue(0).toString();
                    this.terms.addElement(str);
                    Vector<Integer> vector = new Vector<>();
                    int intValue = this.documentFrequency.get(str).intValue();
                    for (int i = 0; i < intValue; i++) {
                        xMLStreamReader.nextTag();
                        if (!xMLStreamReader.getLocalName().equals("DocumentIndex")) {
                            throw new XMLStreamException("number of <DocumentIndex>-tags does not equal the required one according to document frequency of term " + str);
                        }
                        vector.addElement(new Integer(xMLStreamReader.getElementText()));
                    }
                    xMLStreamReader.nextTag();
                    this.termOccurrenceOnDocuments.addElement(vector);
                    xMLStreamReader.require(2, (String) null, "TermOccurrenceOnDocuments");
                } else if (localName.equals("SingleTermListFile")) {
                    File file = new File(xMLStreamReader.getElementText());
                    SingleTermList singleTermList = new SingleTermList();
                    try {
                        try {
                            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), "UTF8");
                            XMLStreamReader createXMLStreamReader = XMLInputFactory.newInstance().createXMLStreamReader(inputStreamReader);
                            singleTermList.readXML(createXMLStreamReader);
                            createXMLStreamReader.close();
                            inputStreamReader.close();
                        } catch (UnsupportedEncodingException e) {
                            e.printStackTrace();
                        }
                    } catch (FileNotFoundException e2) {
                        e2.printStackTrace();
                    } catch (XMLStreamException e3) {
                        e3.printStackTrace();
                    }
                    this.singleTermLists.add(singleTermList);
                    xMLStreamReader.require(2, (String) null, "SingleTermListFile");
                } else {
                    if (!localName.equals("DocumentInfo") && !localName.equals("Content")) {
                        throw new XMLStreamException("found unknown tag");
                    }
                    xMLStreamReader.nextTag();
                }
                xMLStreamReader.nextTag();
            }
            xMLStreamReader.require(2, (String) null, "EntityTermProfile");
            xMLStreamReader.next();
            xMLStreamReader.require(8, (String) null, (String) null);
            xMLStreamReader.close();
        } catch (Exception e4) {
            e4.printStackTrace();
        }
    }

    public TermsWeights getMostImportantTerms(int i, Hashtable hashtable) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Enumeration keys = hashtable.keys();
        while (keys.hasMoreElements()) {
            String str = (String) keys.nextElement();
            Float valueOf = Float.valueOf(hashtable.get(str).toString());
            arrayList.add(str);
            arrayList2.add(valueOf);
        }
        TermsWeights termsWeights = new TermsWeights(new ArrayList(), new ArrayList());
        for (int i2 = 0; i2 < i && arrayList2.size() > 0; i2++) {
            float f = 0.0f;
            for (int i3 = 0; i3 < arrayList2.size(); i3++) {
                f = Math.max(f, ((Float) arrayList2.get(i3)).floatValue());
            }
            int indexOf = arrayList2.indexOf(new Float(f));
            String str2 = (String) arrayList.get(indexOf);
            arrayList.remove(indexOf);
            arrayList2.remove(indexOf);
            termsWeights.add(str2, new Float(f));
        }
        return termsWeights;
    }

    public String getCrawlDetails() {
        return this.crawlDetails;
    }

    public void setCrawlDetails(String str) {
        this.crawlDetails = str;
    }

    public File getDirLocal() {
        return this.dirLocal;
    }

    public void setDirLocal(File file) {
        this.dirLocal = file;
    }

    public String getEntityName() {
        return this.entityName;
    }

    public void setEntityName(String str) {
        this.entityName = str;
    }

    public Hashtable<String, Long> getTermFrequency() {
        return this.termFrequency;
    }

    public Hashtable<String, Integer> getDocumentFrequency() {
        return this.documentFrequency;
    }

    public Vector<Vector<Integer>> getTermOccurrenceOnDocuments() {
        return this.termOccurrenceOnDocuments;
    }

    public Vector<String> getTerms() {
        return this.terms;
    }

    public Integer getNumberDocuments() {
        return this.numberDocuments;
    }

    public Hashtable<String, Double> getTFxIDF() {
        return this.TFxIDF;
    }

    public Vector<SingleTermList> getSingleTermLists() {
        return this.singleTermLists;
    }

    public void setExtAudio(Vector<String> vector) {
        this.extAudio = vector;
    }

    public void setExtImage(Vector<String> vector) {
        this.extImage = vector;
    }

    public void setExtVideo(Vector<String> vector) {
        this.extVideo = vector;
    }
}
