package comirva.web.indexing;

import cp.util.TextTool;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.FieldCacheTermsFilter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.SimpleFSDirectory;

/* loaded from: input_file:comirva/web/indexing/TermWeighting_Lucene.class */
public class TermWeighting_Lucene {
    protected static BufferedWriter BW;
    private static IndexReader reader;
    private static Searcher searcher;
    protected static final File INDEX_DIR = new File("C:/Research/Data/web_crawls/C412a/Google_2010-01-08/M_index/");
    protected static final File OUTPUT_DIR = new File("C:/Research/Data/web_crawls/C412a/Google_2010-01-08/M_weights");
    protected static final File ARTIST_FILE = new File("C:/Research/Data/web_crawls/C412a/C412a.txt");
    private static ArrayList<String> artists = new ArrayList<>();
    private static Analyzer analyzer = new HTMLAnalyzer("C:/Research/Data/web_crawls/music_terms.txt");

    public void run() {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashMap[] hashMapArr = new HashMap[artists.size()];
        for (int i = 0; i < artists.size(); i++) {
            hashMapArr[i] = new HashMap();
        }
        for (int i2 = 0; i2 < artists.size(); i2++) {
            try {
                String lowerCase = artists.get(i2).toLowerCase();
                System.out.println("artist: " + lowerCase);
                Vector vector = new Vector();
                for (ScoreDoc scoreDoc : searcher.search(new MatchAllDocsQuery(), new FieldCacheTermsFilter("artist", new String[]{TextTool.removeUnwantedChars(lowerCase)}), 10000).scoreDocs) {
                    TermFreqVector termFreqVector = reader.getTermFreqVector(scoreDoc.doc, "contents");
                    if (termFreqVector != null) {
                        String[] terms = termFreqVector.getTerms();
                        int[] termFrequencies = termFreqVector.getTermFrequencies();
                        for (int i3 = 0; i3 < terms.length; i3++) {
                            if (hashMapArr[i2].containsKey(terms[i3])) {
                                hashMapArr[i2].put(terms[i3], Integer.valueOf(((Integer) hashMapArr[i2].get(terms[i3])).intValue() + termFrequencies[i3]));
                            } else {
                                hashMapArr[i2].put(terms[i3], Integer.valueOf(termFrequencies[i3]));
                            }
                            if (termFrequencies[i3] > 0 && !vector.contains(terms[i3])) {
                                vector.add(terms[i3]);
                                if (hashMap.containsKey(terms[i3])) {
                                    hashMap.put(terms[i3], Integer.valueOf(((Integer) hashMap.get(terms[i3])).intValue() + 1));
                                } else {
                                    hashMap.put(terms[i3], 1);
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
        }
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(String.valueOf(OUTPUT_DIR.getAbsolutePath()) + "/Global_DF.txt")));
        BufferedWriter bufferedWriter2 = new BufferedWriter(new FileWriter(new File(String.valueOf(OUTPUT_DIR.getAbsolutePath()) + "/Global_IDF.txt")));
        for (String str : hashMap.keySet()) {
            hashMap2.put(str, Float.valueOf((float) Math.log1p(artists.size() / ((Integer) hashMap.get(str)).intValue())));
            bufferedWriter.append((CharSequence) (String.valueOf(str) + ":" + hashMap.get(str) + "\n"));
            bufferedWriter2.append((CharSequence) (String.valueOf(str) + ":" + hashMap2.get(str) + "\n"));
        }
        bufferedWriter.close();
        bufferedWriter2.close();
        for (int i4 = 0; i4 < artists.size(); i4++) {
            String lowerCase2 = artists.get(i4).toLowerCase();
            String removeUnwantedChars = TextTool.removeUnwantedChars(lowerCase2);
            System.out.println("storing term weights of artist: " + lowerCase2);
            BufferedWriter bufferedWriter3 = new BufferedWriter(new FileWriter(new File(String.valueOf(OUTPUT_DIR.getAbsolutePath()) + "/TF_" + removeUnwantedChars + ".txt")));
            BufferedWriter bufferedWriter4 = new BufferedWriter(new FileWriter(new File(String.valueOf(OUTPUT_DIR.getAbsolutePath()) + "/TFIDF_" + removeUnwantedChars + ".txt")));
            for (String str2 : hashMap2.keySet()) {
                Integer num = (Integer) hashMapArr[i4].get(str2);
                Float f = (Float) hashMap2.get(str2);
                if (num == null) {
                    bufferedWriter3.append((CharSequence) (String.valueOf(str2) + ":" + new Integer(0) + "\n"));
                    bufferedWriter4.append((CharSequence) (String.valueOf(str2) + ":" + new Integer(0) + "\n"));
                } else {
                    bufferedWriter3.append((CharSequence) (String.valueOf(str2) + ":" + num + "\n"));
                    bufferedWriter4.append((CharSequence) (String.valueOf(str2) + ":" + (((float) Math.log1p(num.intValue())) * f.floatValue()) + "\n"));
                }
            }
            bufferedWriter3.close();
            bufferedWriter4.close();
        }
    }

    public static void main(String[] strArr) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(ARTIST_FILE));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.out.println("number of artists: " + artists.size());
                reader = IndexReader.open(new SimpleFSDirectory(INDEX_DIR));
                searcher = new IndexSearcher(reader);
                Date date = new Date();
                new TermWeighting_Lucene().run();
                reader.close();
                searcher.close();
                System.out.print(new Date().getTime() - date.getTime());
                System.out.println(" total milliseconds");
                return;
            }
            artists.add(readLine);
        }
    }
}
