package comirva.web.crawling;

import comirva.config.AnySearchConfig;
import comirva.exception.WebCrawlException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
import java.security.AccessControlException;
import java.security.Permission;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.activemq.transport.stomp.Stomp;
import org.apache.log4j.spi.LocationInfo;
import org.slf4j.Marker;

/* loaded from: input_file:comirva/web/crawling/AnySearch.class */
public class AnySearch {
    public static int RESULTS_TO_REQUEST = 50;
    public static int MAX_RETRIES = 5;
    public static int RETRY_INTERVAL = 10000;
    public static int MAX_WAITTIME = 60000;
    private URL url;
    private ContentReceiver cr;
    private final int MAX_REDIRECTS = 7;
    private String content = "";
    private String plaintext = "";
    private Vector redirectStations = new Vector();
    private int redirects = 0;
    private StringBuffer contentBuffer = new StringBuffer();
    private boolean timeout = false;

    /* loaded from: input_file:comirva/web/crawling/AnySearch$ContentReceiver.class */
    private class ContentReceiver extends Thread {
        private boolean success = false;
        private String message = "retrieving page takes too long - skipping.";

        public ContentReceiver() {
        }

        public boolean getStatus() {
            return this.success;
        }

        public String getMessage() {
            return this.message;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            Hashtable hashtable = new Hashtable();
            try {
                Socket socket = new Socket(AnySearch.this.url.getHost(), 80);
                try {
                    socket.setSoTimeout(10000);
                    try {
                        OutputStream outputStream = socket.getOutputStream();
                        try {
                            outputStream.write(new String("GET " + AnySearch.this.url.getPath() + (AnySearch.this.url.getQuery() != null ? LocationInfo.NA + AnySearch.this.url.getQuery() : "") + " HTTP/1.1\r\nhost: " + AnySearch.this.url.getHost() + "\r\n\r\n").getBytes());
                            outputStream.flush();
                            try {
                                InputStream inputStream = socket.getInputStream();
                                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
                                try {
                                    String readLine = bufferedReader.readLine();
                                    if (readLine.matches("HTTP/1.[01] 20[0-9].*")) {
                                        while (true) {
                                            String readLine2 = bufferedReader.readLine();
                                            if (readLine2.equals("")) {
                                                break;
                                            }
                                            int indexOf = readLine2.indexOf(": ");
                                            if (indexOf != -1) {
                                                hashtable.put(readLine2.substring(0, indexOf).toLowerCase(), readLine2.substring(indexOf + 2).toLowerCase());
                                            }
                                        }
                                        bufferedReader.readLine();
                                        String str = (String) hashtable.get(Stomp.Headers.CONTENT_LENGTH);
                                        if (str == null) {
                                            AnySearch.this.contentBuffer = new StringBuffer();
                                            boolean z = false;
                                            boolean z2 = false;
                                            while (true) {
                                                try {
                                                    String readLine3 = bufferedReader.readLine();
                                                    if (readLine3 != null && (!z || !readLine3.equals("0"))) {
                                                        z = false;
                                                        if (readLine3.equals("")) {
                                                            z = true;
                                                        }
                                                        if (z2 && readLine3.equals("")) {
                                                            break;
                                                        }
                                                        z2 = false;
                                                        if (readLine3.equals("0")) {
                                                            z2 = true;
                                                        }
                                                        AnySearch.this.contentBuffer.append(readLine3);
                                                    }
                                                } catch (OutOfMemoryError e) {
                                                }
                                            }
                                            AnySearch.this.content = AnySearch.this.contentBuffer.toString();
                                        } else {
                                            try {
                                                byte[] bArr = new byte[Integer.parseInt(str)];
                                                inputStream.read(bArr);
                                                AnySearch.this.content = new String(bArr);
                                            } catch (OutOfMemoryError e2) {
                                                AnySearch.this.content = "";
                                            }
                                        }
                                    } else {
                                        if (!readLine.matches("HTTP/1.[01] 30[0-9].*")) {
                                            bufferedReader.close();
                                            inputStream.close();
                                            outputStream.close();
                                            socket.close();
                                            this.message = readLine;
                                            return;
                                        }
                                        if (AnySearch.this.redirects > 7) {
                                            this.message = "too many redirects ... skipping";
                                            return;
                                        }
                                        while (true) {
                                            String readLine4 = bufferedReader.readLine();
                                            if (readLine4.equals("")) {
                                                break;
                                            }
                                            int indexOf2 = readLine4.indexOf(": ");
                                            if (indexOf2 != -1) {
                                                hashtable.put(readLine4.substring(0, indexOf2).toLowerCase(), readLine4.substring(indexOf2 + 2).toLowerCase());
                                            }
                                        }
                                        String str2 = (String) hashtable.get("location");
                                        if (str2 == null) {
                                            this.message = "redirection, but no alternative location.";
                                            return;
                                        }
                                        AnySearch.this.redirectStations.addElement(AnySearch.this.url);
                                        try {
                                            URL url = new URL(AnySearch.this.url, str2);
                                            if (AnySearch.this.redirectStations.contains(url)) {
                                                this.message = "cyclic redirection.";
                                                return;
                                            }
                                            AnySearch.this.url = url;
                                            AnySearch.this.redirects++;
                                            bufferedReader.close();
                                            inputStream.close();
                                            outputStream.close();
                                            socket.close();
                                            run();
                                        } catch (MalformedURLException e3) {
                                            this.message = "malformed redirection URL - " + e3.getMessage();
                                            return;
                                        }
                                    }
                                } catch (SocketTimeoutException e4) {
                                    if (AnySearch.this.content.equals("")) {
                                        AnySearch.this.content = AnySearch.this.contentBuffer.toString().toLowerCase();
                                    }
                                    if (AnySearch.this.content.equals("")) {
                                        this.message = String.valueOf(e4.getMessage()) + " - no content retrieved";
                                        return;
                                    }
                                    AnySearch.this.timeout = true;
                                } catch (IOException e5) {
                                    this.message = "couldn't open URL " + AnySearch.this.url.toString();
                                    return;
                                } catch (NullPointerException e6) {
                                    this.message = "malformed response.";
                                    return;
                                } catch (AccessControlException e7) {
                                    Permission permission = e7.getPermission();
                                    this.message = String.valueOf(permission.getName()) + " - " + permission.getActions();
                                    return;
                                }
                                try {
                                    bufferedReader.close();
                                    inputStream.close();
                                    outputStream.close();
                                    socket.close();
                                } catch (IOException e8) {
                                    this.message = e8.getMessage();
                                }
                                this.success = true;
                            } catch (IOException e9) {
                                this.message = "error while trying to access stream.";
                            }
                        } catch (IOException e10) {
                            this.message = "error while sending request.";
                        }
                    } catch (IOException e11) {
                        this.message = "failed to connect.";
                    }
                } catch (SocketException e12) {
                    this.message = "failed to connect.";
                }
            } catch (UnknownHostException e13) {
                this.message = e13.getMessage();
            } catch (IOException e14) {
                this.message = "failed to connect.";
            }
        }
    }

    public AnySearch(AnySearchConfig anySearchConfig, String str, String str2) throws WebCrawlException {
        RESULTS_TO_REQUEST = anySearchConfig.getNumberOfRequestedPages();
        MAX_RETRIES = anySearchConfig.getNumberOfRetries();
        RETRY_INTERVAL = anySearchConfig.getIntervalBetweenRetries() * 1000;
        this.url = generateSearchURL(str, str2, anySearchConfig.getFirstRequestedPageNumber());
        Runtime runtime = Runtime.getRuntime();
        for (int i = 0; i <= MAX_RETRIES; i++) {
            if (i == MAX_RETRIES) {
                throw new WebCrawlException(this.cr.getMessage());
            }
            runtime.gc();
            this.cr = new ContentReceiver();
            this.cr.start();
            try {
                this.cr.join(MAX_WAITTIME);
            } catch (InterruptedException e) {
            }
            if (this.cr.getStatus()) {
                break;
            }
            try {
                Thread.sleep(RETRY_INTERVAL);
            } catch (InterruptedException e2) {
            }
        }
        runtime.gc();
    }

    public int getPageCount() {
        String substring;
        int indexOf;
        int indexOf2 = this.content.toLowerCase().indexOf("of about");
        if (indexOf2 == -1 || (indexOf = (substring = this.content.substring(indexOf2)).indexOf("for")) == -1) {
            return 0;
        }
        String substring2 = substring.substring(0, indexOf);
        int indexOf3 = substring2.indexOf("<b>");
        int indexOf4 = substring2.indexOf("</b>");
        if (indexOf3 == -1 || indexOf4 == -1) {
            return 0;
        }
        String substring3 = substring.substring(indexOf3 + 3, indexOf4);
        String str = "";
        for (int i = 0; i < substring3.length(); i++) {
            if (substring3.charAt(i) != ',') {
                str = String.valueOf(str) + String.valueOf(substring3.charAt(i));
            }
        }
        return new Integer(str).intValue();
    }

    public URL[] getResultURLs(int i) {
        int indexOf;
        int indexOf2;
        Vector vector = new Vector();
        int i2 = 0;
        int i3 = 0;
        while (true) {
            int indexOf3 = this.content.toLowerCase().indexOf("<a", i3);
            if (indexOf3 == -1 || (indexOf = this.content.toLowerCase().indexOf("href", indexOf3)) == -1 || (indexOf2 = this.content.toLowerCase().indexOf("=", indexOf)) == -1) {
                break;
            }
            i3 = indexOf2 + 1;
            try {
                URL url = new URL(this.url, new StringTokenizer(this.content.substring(i3), "\t\n\r\">#").nextToken());
                String inspectURLString = inspectURLString(url.toString());
                if (inspectURLString == null) {
                    continue;
                } else {
                    try {
                        URL url2 = new URL(url, inspectURLString);
                        if (!vector.contains(url2)) {
                            vector.addElement(url2);
                            i2++;
                            if (i > 0 && i2 >= i) {
                                break;
                            }
                        } else {
                            continue;
                        }
                    } catch (MalformedURLException e) {
                    }
                }
            } catch (MalformedURLException e2) {
            }
        }
        if (vector.size() == 0) {
            return null;
        }
        URL[] urlArr = new URL[vector.size()];
        for (int i4 = 0; i4 < urlArr.length; i4++) {
            urlArr[i4] = (URL) vector.elementAt(i4);
        }
        return urlArr;
    }

    public boolean timedOut() {
        return this.timeout;
    }

    private URL generateSearchURL(String str, String str2, int i) throws WebCrawlException {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i2 = 0; i2 < str2.length(); i2++) {
            switch (str2.charAt(i2)) {
                case ' ':
                    stringBuffer.append(Marker.ANY_NON_NULL_MARKER);
                    break;
                case '!':
                case '#':
                case '$':
                case '\'':
                case '(':
                case ')':
                case '*':
                case '-':
                default:
                    stringBuffer.append(str2.charAt(i2));
                    break;
                case '\"':
                    stringBuffer.append("%22");
                    break;
                case '%':
                    stringBuffer.append("%25");
                    break;
                case '&':
                    stringBuffer.append("%26");
                    break;
                case '+':
                    stringBuffer.append("%2B");
                    break;
                case ',':
                    stringBuffer.append("%2C");
                    break;
                case '.':
                    stringBuffer.append("%2E");
                    break;
            }
        }
        String str3 = String.valueOf(str) + "/search?hl=en&safe=off&client=firefox-a&q=" + stringBuffer.toString() + "&num=" + RESULTS_TO_REQUEST + "&start=" + Integer.toString(i);
        System.out.println(str3);
        try {
            return new URL(str3);
        } catch (MalformedURLException e) {
            throw new WebCrawlException("malformed URL: " + str3);
        }
    }

    private String inspectURLString(String str) {
        if (str.indexOf("google") != -1 || str.indexOf("search?q=cache") != -1 || str.endsWith(".pdf") || str.endsWith(".ps") || str.endsWith(".doc") || str.endsWith(".rtf") || str.endsWith(".ppt") || str.endsWith(".pps") || str.endsWith(".xls")) {
            return null;
        }
        try {
            if (new URL(str).getProtocol().compareTo("http") != 0) {
                return null;
            }
            return str;
        } catch (MalformedURLException e) {
            return null;
        }
    }
}
