package comirva.web.indexing;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;

/* loaded from: input_file:comirva/web/indexing/HTMLAnalyzer.class */
public class HTMLAnalyzer extends Analyzer {
    public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
    private int maxTokenLength = 255;
    private static File dictFile;
    public static Set<String> includeWords;

    public HTMLAnalyzer() {
    }

    public HTMLAnalyzer(String str) {
        dictFile = new File(str);
        includeWords = null;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(dictFile));
            Vector vector = new Vector();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                } else {
                    vector.addElement(readLine);
                }
            }
            includeWords = new TreeSet();
            for (int i = 0; i < vector.size(); i++) {
                includeWords.add((String) vector.elementAt(i));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public TokenStream tokenStream(String str, Reader reader) {
        StandardTokenizer standardTokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
        standardTokenizer.setMaxTokenLength(this.maxTokenLength);
        TokenStream shingleFilter = new ShingleFilter(new StopFilter(true, new LowerCaseFilter(new StandardFilter(standardTokenizer)), StopAnalyzer.ENGLISH_STOP_WORDS_SET), 4);
        if (includeWords != null) {
            shingleFilter = new IncludeTermsFilter(true, shingleFilter, includeWords, true);
        }
        return shingleFilter;
    }

    public void setMaxTokenLength(int i) {
        this.maxTokenLength = i;
    }

    public int getMaxTokenLength() {
        return this.maxTokenLength;
    }
}
