package gov.lanl.archive.unload;

import com.hp.hpl.jena.sparql.sse.Tags;
import com.sleepycat.je.rep.utilint.HostPortPair;
import gov.lanl.archive.Memento;
import gov.lanl.archive.index.bdb.IndexImplB;
import gov.lanl.archive.location.PairReader;
import it.unimi.dsi.mg4j.index.IndexProperties;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.net.URI;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TimeZone;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import javax.ws.rs.core.HttpHeaders;
import net.htmlparser.jericho.HTMLElementName;
import org.archive.io.ArchiveRecord;
import org.archive.io.ArchiveRecordHeader;
import org.archive.io.warc.WARCConstants;
import org.archive.io.warc.WARCReader;
import org.archive.io.warc.WARCReaderFactory;
import org.archive.io.warc.WARCRecord;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPool;
import org.archive.io.warc.WARCWriterPoolSettingsData;
import org.archive.net.UURIFactory;
import org.archive.uid.UUIDGenerator;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.ANVLRecord;
import org.archive.wayback.core.CaptureSearchResult;

/* loaded from: input_file:WEB-INF/lib/sitestory-core-1.0.1.jar:gov/lanl/archive/unload/DbWarcWriter.class */
public class DbWarcWriter {
    private static final String PREFIX = "MEM";
    public final int MAX_ACTIVE = 5;
    final int MAX_WAIT_MILLISECONDS = 20000;
    private static final AtomicInteger SERIAL_NO = new AtomicInteger();
    static SimpleDateFormat formatterout = new SimpleDateFormat("yyyyMMddHHmmss");
    static TimeZone tz = TimeZone.getTimeZone("GMT");

    public static void main(String[] strArr) throws IOException, ParseException {
        new DbWarcWriter().testread();
    }

    private void test() throws IOException, ParseException {
        IndexImplB indexImplB = new IndexImplB();
        try {
            DbWarcWriter dbWarcWriter = new DbWarcWriter();
            File[] fileArr = {new File("/Users/ludab/projects/warcfiles")};
            UUIDGenerator uUIDGenerator = new UUIDGenerator();
            AtomicInteger atomicInteger = SERIAL_NO;
            WARCWriterPoolSettingsData wARCWriterPoolSettingsData = new WARCWriterPoolSettingsData(PREFIX, "", -1L, false, null, getMetadata(), uUIDGenerator);
            dbWarcWriter.getClass();
            dbWarcWriter.getClass();
            WARCWriterPool wARCWriterPool = new WARCWriterPool(atomicInteger, wARCWriterPoolSettingsData, 5, 20000);
            System.out.println("Num Active:" + wARCWriterPool.getNumActive());
            WARCWriter wARCWriter = (WARCWriter) wARCWriterPool.borrowFile();
            List until = indexImplB.getUntil(Long.toString(new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss z").parse("Mon, 21 Jun 2010 21:13:25 MDT").getTime()), null);
            System.out.println("got the list");
            int i = 1;
            Iterator it2 = until.iterator();
            while (it2.hasNext()) {
                dbWarcWriter.writeRecords(wARCWriter, (Memento) it2.next());
                wARCWriter.checkSize();
                i++;
                System.out.println("here");
                if (i == 4) {
                    break;
                }
            }
            wARCWriter.checkSize();
            wARCWriter.close();
            wARCWriterPool.returnFile(wARCWriter);
            indexImplB.close();
        } catch (Throwable th) {
            indexImplB.close();
            throw th;
        }
    }

    private void testread() {
        try {
            WARCReader wARCReader = WARCReaderFactory.get(new File("/Users/Lyudimila/projects/tmp/warcs/MEM-20120322195643622-00000-~~.warc.gz"));
            long j = 0;
            Iterator<ArchiveRecord> it2 = wARCReader.iterator();
            while (it2.hasNext()) {
                WARCRecord wARCRecord = (WARCRecord) it2.next();
                ArchiveRecordHeader header = wARCRecord.getHeader();
                long length = header.getLength();
                System.out.println("mimetype" + header.getMimetype());
                System.out.println("length" + length);
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                byte[] bArr = new byte[512];
                BufferedInputStream bufferedInputStream = new BufferedInputStream(wARCRecord);
                while (true) {
                    int read = bufferedInputStream.read(bArr);
                    if (read != -1) {
                        byteArrayOutputStream.write(bArr, 0, read);
                    }
                }
                System.out.println(byteArrayOutputStream.toByteArray().length);
                j++;
            }
            wARCReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void writeWarcinfoRecord(WARCWriter wARCWriter) throws IOException {
        ANVLRecord aNVLRecord = new ANVLRecord();
        aNVLRecord.addLabelValue(IndexProperties.SIZE, "1G");
        aNVLRecord.addLabelValue("operator", "tr-achive");
        aNVLRecord.addLabelValue("format", "WARC File Format 1.0");
        aNVLRecord.addLabelValue("conformsTo", "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf");
        wARCWriter.writeWarcinfoRecord(ANVLRecord.MIMETYPE, null, new ByteArrayInputStream(aNVLRecord.getUTF8Bytes()), r0.length);
    }

    public List MakeFirstRecord() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("format:WARC File Format 1.0\n");
        arrayList.add("conformsTo:http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf\n");
        arrayList.add("operator:tr-achive");
        return arrayList;
    }

    public void setInfo(String str, Map map) {
        System.out.println("from geturlInfo:" + str);
        StringBuffer stringBuffer = new StringBuffer();
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\r\n");
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (nextToken.indexOf(HostPortPair.SEPARATOR) <= 0) {
                stringBuffer.append(nextToken);
                StringTokenizer stringTokenizer2 = new StringTokenizer(nextToken, UURIFactory.SPACE);
                String nextToken2 = stringTokenizer2.nextToken();
                String nextToken3 = stringTokenizer2.nextToken();
                System.out.println("from geturlInfo url" + nextToken3);
                map.put("verb", nextToken2);
                map.put("url", nextToken3);
            } else if (nextToken.substring(0, nextToken.indexOf(HostPortPair.SEPARATOR)).equals(HttpHeaders.HOST)) {
                System.out.println("from geturlInfo host" + nextToken.substring(nextToken.indexOf(HostPortPair.SEPARATOR) + 1));
                map.put(CaptureSearchResult.CAPTURE_ORIGINAL_HOST, nextToken.substring(nextToken.indexOf(HostPortPair.SEPARATOR) + 1));
            }
        }
    }

    public void writeRecords(WARCWriter wARCWriter, Memento memento) {
        InputStream byteArrayInputStream;
        long j;
        InputStream inputStream = null;
        try {
            try {
                formatterout.setTimeZone(tz);
                ANVLRecord aNVLRecord = new ANVLRecord();
                String id = memento.getId();
                memento.getDupId();
                String code = memento.getCode();
                String resheaders = memento.getResheaders();
                int indexOf = resheaders.indexOf("H");
                if (indexOf > 0) {
                    resheaders = resheaders.substring(indexOf);
                }
                String url = memento.getUrl();
                ByteArrayInputStream byteArrayInputStream2 = new ByteArrayInputStream(resheaders.getBytes("UTF-8"));
                long length = resheaders.length();
                if (code.equals("302") || code.equals("303")) {
                    byteArrayInputStream = new ByteArrayInputStream(resheaders.getBytes("UTF-8"));
                    j = length;
                } else {
                    inputStream = new PairReader().read(id, HTMLElementName.BODY);
                    j = length + memento.getLength();
                    System.out.println("size of stream:" + j + "for" + url);
                    byteArrayInputStream = new SequenceInputStream(byteArrayInputStream2, inputStream);
                }
                Date accessdate = memento.getAccessdate();
                System.out.println("mmemento:" + formatterout.format(accessdate));
                System.out.println("dipid" + memento.getDupId());
                if (memento.getId().equals(memento.getDupId())) {
                    aNVLRecord.addLabelValue(WARCConstants.HEADER_KEY_CONCURRENT_TO, "<urn:uuid:" + memento.getId() + Tags.symGT);
                    Date nextdate = memento.getNextdate();
                    String log14Date = ArchiveUtils.getLog14Date(accessdate);
                    if (nextdate != null) {
                        ByteArrayInputStream byteArrayInputStream3 = new ByteArrayInputStream((((("start:" + log14Date + "\n") + "end:" + ArchiveUtils.getLog14Date(nextdate) + "\n") + "digest:" + memento.getDigest() + "\n") + "numberOfHits:" + memento.getCounter() + "\n").getBytes("UTF-8"));
                        wARCWriter.writeMetadataRecord(url, ArchiveUtils.getLog14Date(accessdate), ANVLRecord.MIMETYPE, new URI("urn:uuid:" + UUID.randomUUID()), aNVLRecord, byteArrayInputStream3, r0.length);
                        byteArrayInputStream3.close();
                    } else {
                        System.out.println("date untill null" + url + "," + log14Date);
                    }
                    aNVLRecord.clear();
                    if (!code.equals("302") && !code.equals("303")) {
                        aNVLRecord.addLabelValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST, "sha1:" + memento.getDigest());
                    }
                    wARCWriter.writeResponseRecord(url, ArchiveUtils.getLog14Date(accessdate), WARCConstants.HTTP_RESPONSE_MIMETYPE, new URI("urn:uuid:" + memento.getId()), aNVLRecord, byteArrayInputStream, j);
                    byteArrayInputStream.close();
                    aNVLRecord.clear();
                    URI uri = new URI("urn:uuid:" + UUID.randomUUID().toString());
                    aNVLRecord.addLabelValue(WARCConstants.HEADER_KEY_CONCURRENT_TO, "<urn:uuid:" + memento.getId() + Tags.symGT);
                    long reqheaderslength = memento.getReqheaderslength();
                    if (reqheaderslength > 0) {
                        ByteArrayInputStream byteArrayInputStream4 = new ByteArrayInputStream(memento.getReqheaders().getBytes("UTF-8"));
                        wARCWriter.writeRequestRecord(url, ArchiveUtils.getLog14Date(accessdate), WARCConstants.HTTP_REQUEST_MIMETYPE, uri, aNVLRecord, byteArrayInputStream4, reqheaderslength);
                        byteArrayInputStream4.close();
                    }
                }
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            } catch (Exception e2) {
                e2.printStackTrace();
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e3) {
                        e3.printStackTrace();
                    }
                }
            }
        } catch (Throwable th) {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e4) {
                    e4.printStackTrace();
                    throw th;
                }
            }
            throw th;
        }
    }

    public static List getMetadata() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("format:WARC File Format 1.0\n");
        arrayList.add("conformsTo:http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf\n");
        arrayList.add("operator:tr-achive");
        return arrayList;
    }
}
