package comirva.web.crawling;

import comirva.config.WebCrawlingConfig;
import comirva.util.external.TextFormatTool;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.Enumeration;
import java.util.Vector;
import javax.swing.JLabel;

/* loaded from: input_file:comirva/web/crawling/WebCrawling.class */
public class WebCrawling extends Thread {
    private static int MAX_RETURNED_PAGES = 100;
    private String storagePath;
    private int[] lastIndex;
    private Vector[] searchQueryUrls;
    private JLabel statusBar;
    private WebCrawlingConfig wcCfg;
    private Vector searchWords;

    public WebCrawling(WebCrawlingConfig webCrawlingConfig, Vector vector, JLabel jLabel) {
        this.storagePath = webCrawlingConfig.getPathStoreRetrievedPages();
        this.statusBar = jLabel;
        this.wcCfg = webCrawlingConfig;
        this.searchWords = vector;
    }

    @Override // java.lang.Thread, java.lang.Runnable
    public void run() {
        try {
            if (this.storagePath.charAt(this.storagePath.length() - 1) != '/' && this.storagePath.charAt(this.storagePath.length() - 1) != '\\') {
                this.storagePath = String.valueOf(this.storagePath) + "/";
            }
            File file = new File(this.storagePath);
            if (!file.isDirectory()) {
                file.mkdir();
            }
            this.lastIndex = new int[this.searchWords.size()];
            this.searchQueryUrls = new Vector[this.searchWords.size()];
            for (int i = 0; i < this.searchWords.size(); i++) {
                this.searchQueryUrls[i] = new Vector();
            }
        } catch (Exception e) {
        }
        for (int i2 = 0; i2 < ((int) Math.ceil(this.wcCfg.getNumberOfRequestedPages() / MAX_RETURNED_PAGES)); i2++) {
            startCrawl(this.searchWords, new WebCrawlingConfig(this.wcCfg.getSearchEngineURL(), this.wcCfg.getNumberOfRetries(), this.wcCfg.getIntervalBetweenRetries(), i2 * MAX_RETURNED_PAGES, this.wcCfg.getAdditionalKeywords(), this.wcCfg.getAdditionalKeywordsAfterSearchString(), this.wcCfg.getNumberOfRequestedPages(), this.storagePath, this.wcCfg.getPathExternalCrawler(), this.wcCfg.isStoreURLList(), this.wcCfg.isQuoteSearchTerms()), this.wcCfg.getFirstRequestedPageNumber());
        }
        if (this.wcCfg.isStoreURLList()) {
            for (int i3 = 0; i3 < this.searchQueryUrls.length; i3++) {
                Enumeration elements = this.searchQueryUrls[i3].elements();
                try {
                    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(String.valueOf(this.storagePath) + TextFormatTool.removeUnwantedChars((String) this.searchWords.elementAt(i3)) + "/urls.dat")));
                    while (elements.hasMoreElements()) {
                        bufferedWriter.write(String.valueOf(((URL) elements.nextElement()).toString()) + "\n");
                    }
                    bufferedWriter.flush();
                    bufferedWriter.close();
                } catch (IOException e2) {
                    e2.printStackTrace();
                }
            }
        }
        if (this.statusBar != null) {
            this.statusBar.setText("Web Crawl finished.");
        }
    }

    private void startCrawl(Vector vector, WebCrawlingConfig webCrawlingConfig, int i) {
        int i2 = 0;
        for (int i3 = i; i3 < vector.size(); i3++) {
            try {
                i2 = i3;
                String additionalKeywords = webCrawlingConfig.getAdditionalKeywords();
                boolean additionalKeywordsAfterSearchString = webCrawlingConfig.getAdditionalKeywordsAfterSearchString();
                String str = new String();
                String str2 = String.valueOf(webCrawlingConfig.isQuoteSearchTerms() ? "\"" : "") + ((String) vector.elementAt(i3)) + (webCrawlingConfig.isQuoteSearchTerms() ? "\"" : "");
                if (additionalKeywords != null && additionalKeywords != "" && !additionalKeywordsAfterSearchString) {
                    str = String.valueOf(additionalKeywords) + " ";
                }
                String str3 = String.valueOf(str) + str2;
                if (additionalKeywords != null && additionalKeywords != "" && additionalKeywordsAfterSearchString) {
                    str3 = String.valueOf(str3) + additionalKeywords;
                }
                AnySearch anySearch = new AnySearch(webCrawlingConfig, webCrawlingConfig.getSearchEngineURL(), str3);
                if (this.statusBar != null) {
                    this.statusBar.setText("<html>Processing query <b>" + str3 + "</b>, pages " + (webCrawlingConfig.getFirstRequestedPageNumber() + 1) + " to " + (webCrawlingConfig.getFirstRequestedPageNumber() + MAX_RETURNED_PAGES) + "</html>");
                }
                URL[] resultURLs = anySearch.getResultURLs(MAX_RETURNED_PAGES);
                if (this.statusBar != null) {
                    this.statusBar.setText("<html>Retrieving approximately " + resultURLs.length + " pages for query <b>" + str3 + "</b></html>");
                }
                if (resultURLs != null && resultURLs.length != 0) {
                    new File(String.valueOf(this.storagePath) + TextFormatTool.removeUnwantedChars((String) vector.elementAt(i3))).mkdir();
                }
                for (int i4 = 0; i4 < resultURLs.length; i4++) {
                    if (this.searchQueryUrls[i3].indexOf(resultURLs[i4]) == -1) {
                        this.searchQueryUrls[i3].addElement(resultURLs[i4]);
                        Runtime.getRuntime().exec(String.valueOf(webCrawlingConfig.getPathExternalCrawler()) + " -t " + webCrawlingConfig.getNumberOfRetries() + " -T " + webCrawlingConfig.getIntervalBetweenRetries() + " -O " + this.storagePath + TextFormatTool.removeUnwantedChars((String) vector.elementAt(i3)) + "/" + TextFormatTool.leadingDoubleZero(Integer.toString(this.lastIndex[i3])) + ".html " + resultURLs[i4].toString());
                        int[] iArr = this.lastIndex;
                        int i5 = i3;
                        iArr[i5] = iArr[i5] + 1;
                    }
                }
                if (this.statusBar != null) {
                    this.statusBar.setText("<html>" + this.lastIndex[i3] + " pages for query <b>" + str3 + "</b> retrieved</html>");
                }
            } catch (Exception e) {
                e.printStackTrace();
                startCrawl(vector, webCrawlingConfig, i2 + 1);
                return;
            }
        }
    }
}
