package org.archive.wayback.resourcestore.indexer;

import java.io.File;
import java.io.IOException;
import java.util.logging.Logger;
import org.apache.commons.httpclient.HttpParser;
import org.apache.commons.httpclient.StatusLine;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.util.EncodingUtil;
import org.archive.io.ArchiveRecordHeader;
import org.archive.io.RecoverableIOException;
import org.archive.io.warc.WARCConstants;
import org.archive.io.warc.WARCRecord;
import org.archive.net.UURIFactory;
import org.archive.wayback.UrlCanonicalizer;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.util.Adapter;
import org.archive.wayback.util.url.IdentityUrlCanonicalizer;

/* loaded from: input_file:WEB-INF/lib/wayback-core-1.7.0.jar:org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.class */
public class WARCRecordToSearchResultAdapter implements Adapter<WARCRecord, CaptureSearchResult> {
    private static final Logger LOGGER = Logger.getLogger(WARCRecordToSearchResultAdapter.class.getName());
    private static final String VERSION = "0.1.0";
    private static final String WARC_FILEDESC_VERSION = "warc/warcinfo0.1.0";
    private static final String DEFAULT_VALUE = "-";
    private UrlCanonicalizer canonicalizer;
    private HTTPRecordAnnotater annotater;
    private boolean processAll = false;

    public WARCRecordToSearchResultAdapter() {
        this.canonicalizer = null;
        this.annotater = null;
        this.canonicalizer = new IdentityUrlCanonicalizer();
        this.annotater = new HTTPRecordAnnotater();
    }

    @Override // org.archive.wayback.util.Adapter
    public CaptureSearchResult adapt(WARCRecord wARCRecord) {
        try {
            return adaptInner(wARCRecord);
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        } catch (OutOfMemoryError e2) {
            e2.printStackTrace();
            return null;
        }
    }

    private CaptureSearchResult adaptInner(WARCRecord wARCRecord) throws IOException {
        ArchiveRecordHeader header = wARCRecord.getHeader();
        String obj = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
        CaptureSearchResult genericResult = genericResult(wARCRecord);
        if (obj.equals(WARCConstants.RESPONSE)) {
            String transformHTTPMime = this.annotater.transformHTTPMime(header.getMimetype());
            if (transformHTTPMime == null || !transformHTTPMime.equals("text/dns")) {
                genericResult = adaptWARCHTTPResponse(genericResult, wARCRecord);
            } else {
                wARCRecord.close();
                genericResult.setDigest(transformWARCDigest(wARCRecord.getDigestStr()));
                genericResult.setMimeType(transformHTTPMime);
            }
        } else if (obj.equals(WARCConstants.REVISIT)) {
            genericResult.setMimeType("warc/revisit");
        } else if (obj.equals(WARCConstants.REQUEST)) {
            if (this.processAll) {
                genericResult.setMimeType("warc/request");
            } else {
                genericResult = null;
            }
        } else if (obj.equals(WARCConstants.METADATA)) {
            if (this.processAll) {
                genericResult.setMimeType("warc/metadata");
            } else {
                genericResult = null;
            }
        } else if (obj.equals(WARCConstants.WARCINFO)) {
            genericResult.setMimeType(WARC_FILEDESC_VERSION);
        } else {
            LOGGER.info("Skipping record type : " + obj);
        }
        return genericResult;
    }

    private CaptureSearchResult genericResult(WARCRecord wARCRecord) {
        CaptureSearchResult captureSearchResult = new CaptureSearchResult();
        captureSearchResult.setMimeType("-");
        captureSearchResult.setHttpCode("-");
        captureSearchResult.setRedirectUrl("-");
        ArchiveRecordHeader header = wARCRecord.getHeader();
        String transformWARCFilename = transformWARCFilename(header.getReaderIdentifier());
        long offset = header.getOffset();
        captureSearchResult.setCaptureTimestamp(transformWARCDate(header.getDate()));
        captureSearchResult.setFile(transformWARCFilename);
        captureSearchResult.setOffset(offset);
        captureSearchResult.setDigest(transformWARCDigest(header.getHeaderValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST)));
        String url = header.getUrl();
        if (url != null) {
            captureSearchResult.setOriginalUrl(url);
            try {
                captureSearchResult.setUrlKey(this.canonicalizer.urlStringToKey(url));
            } catch (URIException e) {
                LOGGER.warning("FAILED canonicalize(" + (url.length() < 100 ? url : url.substring(0, 100)) + "):" + transformWARCFilename + UURIFactory.SPACE + offset);
                captureSearchResult.setUrlKey(url);
            }
        } else if (header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString().equals(WARCConstants.WARCINFO)) {
            String obj = header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME).toString();
            captureSearchResult.setOriginalUrl("filedesc:" + obj);
            captureSearchResult.setUrlKey("filedesc:" + obj);
        } else {
            captureSearchResult.setOriginalUrl("-");
            captureSearchResult.setUrlKey("-");
        }
        return captureSearchResult;
    }

    private int getEolCharsCount(byte[] bArr) {
        int i = 0;
        if (bArr != null && bArr.length >= 1 && bArr[bArr.length - 1] == 10) {
            i = 0 + 1;
            if (bArr.length >= 2 && bArr[bArr.length - 2] == 13) {
                i++;
            }
        }
        return i;
    }

    private String transformWARCFilename(String str) {
        String str2 = str;
        int lastIndexOf = str2.lastIndexOf(File.separator);
        if (lastIndexOf > 0 && lastIndexOf + 1 < str2.length()) {
            str2 = str2.substring(lastIndexOf + 1);
        }
        return str2;
    }

    private String transformWARCDigest(Object obj) {
        if (obj == null) {
            return "-";
        }
        String obj2 = obj.toString();
        return obj2.startsWith("sha1:") ? obj2.substring(5) : obj2;
    }

    private static String transformWARCDate(String str) {
        StringBuilder sb = new StringBuilder(14);
        sb.append(str.substring(0, 4));
        sb.append(str.substring(5, 7));
        sb.append(str.substring(8, 10));
        sb.append(str.substring(11, 13));
        sb.append(str.substring(14, 16));
        sb.append(str.substring(17, 19));
        return sb.toString();
    }

    private CaptureSearchResult adaptWARCHTTPResponse(CaptureSearchResult captureSearchResult, WARCRecord wARCRecord) throws IOException {
        ArchiveRecordHeader header = wARCRecord.getHeader();
        byte[] readRawLine = HttpParser.readRawLine(wARCRecord);
        int eolCharsCount = getEolCharsCount(readRawLine);
        if (eolCharsCount <= 0) {
            throw new RecoverableIOException("Failed to read http status where one  was expected: " + (readRawLine == null ? "(null)" : new String(readRawLine)));
        }
        String string = EncodingUtil.getString(readRawLine, 0, readRawLine.length - eolCharsCount, "ISO-8859-1");
        if (string == null || !StatusLine.startsWithHTTP(string)) {
            throw new RecoverableIOException("Failed parse of http status line.");
        }
        captureSearchResult.setHttpCode(String.valueOf(new StatusLine(string).getStatusCode()));
        this.annotater.annotateHTTPContent(captureSearchResult, wARCRecord, HttpParser.parseHeaders(wARCRecord, "ISO-8859-1"), header.getMimetype());
        return captureSearchResult;
    }

    public UrlCanonicalizer getCanonicalizer() {
        return this.canonicalizer;
    }

    public void setCanonicalizer(UrlCanonicalizer urlCanonicalizer) {
        this.canonicalizer = urlCanonicalizer;
    }

    public boolean isProcessAll() {
        return this.processAll;
    }

    public void setProcessAll(boolean z) {
        this.processAll = z;
    }

    public HTTPRecordAnnotater getAnnotater() {
        return this.annotater;
    }

    public void setAnnotater(HTTPRecordAnnotater hTTPRecordAnnotater) {
        this.annotater = hTTPRecordAnnotater;
    }
}
