package org.archive.wayback.resourcestore.indexer;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.logging.Logger;
import org.archive.wayback.Shutdownable;
import org.archive.wayback.UrlCanonicalizer;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.resourceindex.cdx.CDXFormatIndex;
import org.archive.wayback.resourceindex.cdx.SearchResultToCDXFormatAdapter;
import org.archive.wayback.resourceindex.cdx.format.CDXFormat;
import org.archive.wayback.resourceindex.cdx.format.CDXFormatException;
import org.archive.wayback.resourceindex.updater.IndexClient;
import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB;
import org.archive.wayback.util.CloseableIterator;
import org.archive.wayback.util.url.AggressiveUrlCanonicalizer;
import org.archive.wayback.util.url.IdentityUrlCanonicalizer;

/* loaded from: input_file:WEB-INF/lib/wayback-core-1.7.0.jar:org/archive/wayback/resourcestore/indexer/IndexWorker.class */
public class IndexWorker implements Shutdownable {
    private static final Logger LOGGER = Logger.getLogger(IndexWorker.class.getName());
    public static final String ARC_EXTENSION = ".arc";
    public static final String ARC_GZ_EXTENSION = ".arc.gz";
    public static final String WARC_EXTENSION = ".warc";
    public static final String WARC_GZ_EXTENSION = ".warc.gz";
    private ArcIndexer arcIndexer = new ArcIndexer();
    private WarcIndexer warcIndexer = new WarcIndexer();
    private UrlCanonicalizer canonicalizer = new IdentityUrlCanonicalizer();
    private long interval = 120000;
    private IndexQueue queue = null;
    private ResourceFileLocationDB db = null;
    private IndexClient target = null;
    private WorkerThread thread = null;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/wayback-core-1.7.0.jar:org/archive/wayback/resourcestore/indexer/IndexWorker$WorkerThread.class */
    public class WorkerThread extends Thread {
        private long runInterval;
        private IndexWorker worker;

        public WorkerThread(IndexWorker indexWorker, long j) {
            this.runInterval = 120000L;
            this.worker = null;
            this.worker = indexWorker;
            this.runInterval = j;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            IndexWorker.LOGGER.info("alive.");
            long j = this.runInterval;
            while (true) {
                try {
                    j = this.worker.doWork() ? 0L : j + this.runInterval;
                    if (j > 0) {
                        sleep(j);
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (InterruptedException e2) {
                    IndexWorker.LOGGER.info("Shutting Down.");
                    return;
                }
            }
        }
    }

    public void init() {
        this.arcIndexer.setCanonicalizer(this.canonicalizer);
        this.warcIndexer.setCanonicalizer(this.canonicalizer);
        if (this.interval > 0) {
            this.thread = new WorkerThread(this, this.interval);
            this.thread.start();
        }
    }

    @Override // org.archive.wayback.Shutdownable
    public void shutdown() {
        if (this.thread != null) {
            this.thread.interrupt();
            try {
                this.thread.join(1000L);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    public boolean doWork() throws IOException {
        boolean z = false;
        String dequeue = this.queue.dequeue();
        if (dequeue != null) {
            z = true;
            try {
                String[] nameToUrls = this.db.nameToUrls(dequeue);
                if (nameToUrls != null) {
                    try {
                        if (0 < nameToUrls.length) {
                            String str = nameToUrls[0];
                            LOGGER.info("Indexing " + dequeue + " from " + str);
                            CloseableIterator<CaptureSearchResult> indexFile = indexFile(str);
                            this.target.addSearchResults(dequeue, indexFile);
                            indexFile.close();
                        }
                    } catch (IOException e) {
                        LOGGER.severe("FAILED to index or upload (" + dequeue + ")");
                        e.printStackTrace();
                    }
                }
            } catch (IOException e2) {
                LOGGER.severe("FAILED TO LOOKUP(" + dequeue + ")" + e2.getLocalizedMessage());
                return false;
            }
        }
        return z;
    }

    public CloseableIterator<CaptureSearchResult> indexFile(String str) throws IOException {
        CloseableIterator<CaptureSearchResult> closeableIterator = null;
        if (str.endsWith(".arc")) {
            closeableIterator = this.arcIndexer.iterator(str);
        } else if (str.endsWith(".arc.gz")) {
            closeableIterator = this.arcIndexer.iterator(str);
        } else if (str.endsWith(".warc")) {
            closeableIterator = this.warcIndexer.iterator(str);
        } else if (str.endsWith(".warc.gz")) {
            closeableIterator = this.warcIndexer.iterator(str);
        }
        return closeableIterator;
    }

    private static void USAGE() {
        System.err.println("USAGE:");
        System.err.println("");
        System.err.println("cdx-indexer [-format FORMAT|-identity] FILE");
        System.err.println("cdx-indexer [-format FORMAT|-identity] FILE CDXFILE");
        System.err.println("");
        System.err.println("Create a CDX format index from ARC or WARC file");
        System.err.println("FILE at CDXFILE or to STDOUT.");
        System.err.println("With -identity, perform no url canonicalization.");
        System.err.println("With -format, output CDX in format FORMAT.");
        System.exit(1);
    }

    public static void main(String[] strArr) {
        String str = CDXFormatIndex.CDX_HEADER_MAGIC;
        PrintWriter printWriter = new PrintWriter(System.out);
        UrlCanonicalizer aggressiveUrlCanonicalizer = new AggressiveUrlCanonicalizer();
        boolean z = false;
        boolean z2 = false;
        String str2 = null;
        if (strArr.length == 0) {
            USAGE();
        }
        int i = 0;
        while (true) {
            if (i >= strArr.length) {
                break;
            }
            if (strArr[i].equals("-identity")) {
                aggressiveUrlCanonicalizer = new IdentityUrlCanonicalizer();
                z2 = true;
            } else if (strArr[i].equals("-format")) {
                i++;
                if (i >= strArr.length) {
                    USAGE();
                }
                str = strArr[i];
                z = true;
            } else if (str2 == null) {
                str2 = strArr[i];
            } else {
                if (i + 1 != strArr.length) {
                    USAGE();
                }
                try {
                    printWriter = new PrintWriter(strArr[i]);
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                    System.exit(1);
                }
            }
            i++;
        }
        if (!z && z2) {
            str = str.replace(" N ", " a ");
        }
        IndexWorker indexWorker = new IndexWorker();
        indexWorker.canonicalizer = aggressiveUrlCanonicalizer;
        indexWorker.interval = 0L;
        indexWorker.init();
        try {
            Iterator<String> adapt = SearchResultToCDXFormatAdapter.adapt(indexWorker.indexFile(str2), new CDXFormat(str));
            printWriter.println(str);
            while (adapt.hasNext()) {
                printWriter.println(adapt.next());
            }
            printWriter.close();
        } catch (IOException e2) {
            e2.printStackTrace();
            System.exit(1);
        } catch (CDXFormatException e3) {
            e3.printStackTrace();
            System.exit(1);
        }
    }

    public long getInterval() {
        return this.interval;
    }

    public void setInterval(long j) {
        this.interval = j;
    }

    public IndexQueue getQueue() {
        return this.queue;
    }

    public void setQueue(IndexQueue indexQueue) {
        this.queue = indexQueue;
    }

    public ResourceFileLocationDB getDb() {
        return this.db;
    }

    public void setDb(ResourceFileLocationDB resourceFileLocationDB) {
        this.db = resourceFileLocationDB;
    }

    public IndexClient getTarget() {
        return this.target;
    }

    public void setTarget(IndexClient indexClient) {
        this.target = indexClient;
    }

    public UrlCanonicalizer getCanonicalizer() {
        return this.canonicalizer;
    }

    public void setCanonicalizer(UrlCanonicalizer urlCanonicalizer) {
        this.canonicalizer = urlCanonicalizer;
    }
}
