package de.uniol.inf.is.odysseus.wrapper.webcrawler.physicaloperator.access;

import de.uniol.inf.is.odysseus.core.collection.OptionMap;
import de.uniol.inf.is.odysseus.core.collection.Tuple;
import de.uniol.inf.is.odysseus.core.physicaloperator.access.protocol.IProtocolHandler;
import de.uniol.inf.is.odysseus.core.physicaloperator.access.transport.AbstractSimplePullTransportHandler;
import de.uniol.inf.is.odysseus.core.physicaloperator.access.transport.ITransportHandler;
import de.uniol.inf.is.odysseus.wrapper.webcrawler.util.WebCrawler;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import java.util.ArrayList;
import java.util.Iterator;

/* loaded from: input_file:de/uniol/inf/is/odysseus/wrapper/webcrawler/physicaloperator/access/WebCrawlerTransportHandler.class */
public class WebCrawlerTransportHandler extends AbstractSimplePullTransportHandler<Tuple<?>> {
    public static final String NAME = "WebCrawler";
    public static final String DEPTH = "depth";
    public static final String WEBSITES = "site";
    public static final String FETCH = "fetch";
    public String depth;
    public String sites;
    public String fetch;
    private ArrayList<String> urls;
    private ArrayList<String> crawledText;
    private WebCrawler crawler;
    private CrawlController controller;
    private CrawlConfig config;
    private PageFetcher pageFetcher;
    private RobotstxtConfig robotstxtConfig;
    private RobotstxtServer robotstxtServer;

    public WebCrawlerTransportHandler() {
        this.urls = new ArrayList<>();
        this.crawledText = new ArrayList<>();
        this.controller = null;
    }

    public WebCrawlerTransportHandler(IProtocolHandler<?> iProtocolHandler, OptionMap optionMap) {
        super(iProtocolHandler, optionMap);
        this.urls = new ArrayList<>();
        this.crawledText = new ArrayList<>();
        this.controller = null;
        init(optionMap);
    }

    public void init(OptionMap optionMap) {
        if (optionMap.containsKey(DEPTH)) {
            this.depth = optionMap.get(DEPTH);
        }
        if (optionMap.containsKey(WEBSITES)) {
            this.sites = optionMap.get(WEBSITES);
            for (String str : this.sites.split("�")) {
                this.urls.add(str);
            }
        }
        if (optionMap.containsKey(FETCH)) {
            this.fetch = optionMap.get(FETCH);
        }
    }

    public ITransportHandler createInstance(IProtocolHandler<?> iProtocolHandler, OptionMap optionMap) {
        return new WebCrawlerTransportHandler(iProtocolHandler, optionMap);
    }

    public String getName() {
        return NAME;
    }

    public boolean isSemanticallyEqualImpl(ITransportHandler iTransportHandler) {
        return false;
    }

    public boolean hasNext() {
        initializeCrawler();
        return this.crawledText != null;
    }

    /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
    public Tuple<?> m377getNext() {
        Tuple<?> tuple = new Tuple<>(2, false);
        tuple.setAttribute(0, this.sites);
        tuple.setAttribute(1, this.crawledText.toString());
        return tuple;
    }

    public void initializeCrawler() {
        String property = System.getProperty("user.home");
        this.config = new CrawlConfig();
        this.config.setCrawlStorageFolder(property);
        this.config.setMaxDepthOfCrawling(Integer.parseInt(this.depth));
        this.config.setMaxPagesToFetch(Integer.parseInt(this.fetch));
        this.pageFetcher = new PageFetcher(this.config);
        this.robotstxtConfig = new RobotstxtConfig();
        this.robotstxtServer = new RobotstxtServer(this.robotstxtConfig, this.pageFetcher);
        try {
            this.controller = new CrawlController(this.config, this.pageFetcher, this.robotstxtServer);
        } catch (Exception e) {
            e.printStackTrace();
        }
        Iterator<String> it = this.urls.iterator();
        while (it.hasNext()) {
            this.controller.addSeed(it.next());
        }
        this.crawler = new WebCrawler();
        this.controller.start(WebCrawler.class, 1);
        this.crawledText.clear();
        this.crawledText = this.crawler.getText();
        trimCrawledText();
        this.controller.shutdown();
        this.controller.waitUntilFinish();
    }

    private void trimCrawledText() {
        ArrayList<String> arrayList = new ArrayList<>();
        Iterator<String> it = this.crawledText.iterator();
        while (it.hasNext()) {
            StringBuffer stringBuffer = new StringBuffer(it.next());
            int i = 0;
            while (true) {
                boolean z = true;
                if (i >= stringBuffer.length()) {
                    break;
                }
                if (stringBuffer.charAt(i) == '\n') {
                    stringBuffer.deleteCharAt(i);
                    z = false;
                }
                if (stringBuffer.charAt(i) == '\t') {
                    stringBuffer.deleteCharAt(i);
                    z = false;
                }
                if (stringBuffer.charAt(i) == '\r') {
                    stringBuffer.deleteCharAt(i);
                    z = false;
                }
                if (z) {
                    i++;
                }
            }
            arrayList.add(stringBuffer.toString().trim());
        }
        this.crawledText = arrayList;
    }
}
