001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
038import org.apache.commons.compress.utils.ArchiveUtils;
039import org.apache.commons.compress.utils.IOUtils;
040import org.apache.commons.compress.utils.InputStreamStatistics;
041
042import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
044import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
046
047/**
048 * Implements an input stream that can read Zip archives.
049 *
050 * <p>As of Apache Commons Compress it transparently supports Zip64
051 * extensions and thus individual entries and archives larger than 4
052 * GB or with more than 65536 entries.</p>
053 *
054 * <p>The {@link ZipFile} class is preferred when reading from files
055 * as {@link ZipArchiveInputStream} is limited by not being able to
056 * read the central directory header before returning entries.  In
057 * particular {@link ZipArchiveInputStream}</p>
058 *
059 * <ul>
060 *
061 *  <li>may return entries that are not part of the central directory
062 *  at all and shouldn't be considered part of the archive.</li>
063 *
064 *  <li>may return several entries with the same name.</li>
065 *
066 *  <li>will not return internal or external attributes.</li>
067 *
068 *  <li>may return incomplete extra field data.</li>
069 *
070 *  <li>may return unknown sizes and CRC values for entries until the
071 *  next entry has been reached if the archive uses the data
072 *  descriptor feature.</li>
073 *
074 * </ul>
075 *
076 * @see ZipFile
077 * @NotThreadSafe
078 */
079public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
080
081    /** The zip encoding to use for filenames and the file comment. */
082    private final ZipEncoding zipEncoding;
083
084    // the provided encoding (for unit tests)
085    final String encoding;
086
087    /** Whether to look for and use Unicode extra fields. */
088    private final boolean useUnicodeExtraFields;
089
090    /** Wrapped stream, will always be a PushbackInputStream. */
091    private final InputStream in;
092
093    /** Inflater used for all deflated entries. */
094    private final Inflater inf = new Inflater(true);
095
096    /** Buffer used to read from the wrapped stream. */
097    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
098
099    /** The entry that is currently being read. */
100    private CurrentEntry current = null;
101
102    /** Whether the stream has been closed. */
103    private boolean closed = false;
104
105    /** Whether the stream has reached the central directory - and thus found all entries. */
106    private boolean hitCentralDirectory = false;
107
108    /**
109     * When reading a stored entry that uses the data descriptor this
110     * stream has to read the full entry and caches it.  This is the
111     * cache.
112     */
113    private ByteArrayInputStream lastStoredEntry = null;
114
115    /** Whether the stream will try to read STORED entries that use a data descriptor. */
116    private boolean allowStoredEntriesWithDataDescriptor = false;
117
118    /** Count decompressed bytes for current entry */
119    private long uncompressedCount = 0;
120
121    private static final int LFH_LEN = 30;
122    /*
123      local file header signature     WORD
124      version needed to extract       SHORT
125      general purpose bit flag        SHORT
126      compression method              SHORT
127      last mod file time              SHORT
128      last mod file date              SHORT
129      crc-32                          WORD
130      compressed size                 WORD
131      uncompressed size               WORD
132      file name length                SHORT
133      extra field length              SHORT
134    */
135
136    private static final int CFH_LEN = 46;
137    /*
138        central file header signature   WORD
139        version made by                 SHORT
140        version needed to extract       SHORT
141        general purpose bit flag        SHORT
142        compression method              SHORT
143        last mod file time              SHORT
144        last mod file date              SHORT
145        crc-32                          WORD
146        compressed size                 WORD
147        uncompressed size               WORD
148        file name length                SHORT
149        extra field length              SHORT
150        file comment length             SHORT
151        disk number start               SHORT
152        internal file attributes        SHORT
153        external file attributes        WORD
154        relative offset of local header WORD
155    */
156
157    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
158
159    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
160    private final byte[] lfhBuf = new byte[LFH_LEN];
161    private final byte[] skipBuf = new byte[1024];
162    private final byte[] shortBuf = new byte[SHORT];
163    private final byte[] wordBuf = new byte[WORD];
164    private final byte[] twoDwordBuf = new byte[2 * DWORD];
165
166    private int entriesRead = 0;
167
168    /**
169     * Create an instance using UTF-8 encoding
170     * @param inputStream the stream to wrap
171     */
172    public ZipArchiveInputStream(final InputStream inputStream) {
173        this(inputStream, ZipEncodingHelper.UTF8);
174    }
175
176    /**
177     * Create an instance using the specified encoding
178     * @param inputStream the stream to wrap
179     * @param encoding the encoding to use for file names, use null
180     * for the platform's default encoding
181     * @since 1.5
182     */
183    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
184        this(inputStream, encoding, true);
185    }
186
187    /**
188     * Create an instance using the specified encoding
189     * @param inputStream the stream to wrap
190     * @param encoding the encoding to use for file names, use null
191     * for the platform's default encoding
192     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
193     * Extra Fields (if present) to set the file names.
194     */
195    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
196        this(inputStream, encoding, useUnicodeExtraFields, false);
197    }
198
199    /**
200     * Create an instance using the specified encoding
201     * @param inputStream the stream to wrap
202     * @param encoding the encoding to use for file names, use null
203     * for the platform's default encoding
204     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
205     * Extra Fields (if present) to set the file names.
206     * @param allowStoredEntriesWithDataDescriptor whether the stream
207     * will try to read STORED entries that use a data descriptor
208     * @since 1.1
209     */
210    public ZipArchiveInputStream(final InputStream inputStream,
211                                 final String encoding,
212                                 final boolean useUnicodeExtraFields,
213                                 final boolean allowStoredEntriesWithDataDescriptor) {
214        this.encoding = encoding;
215        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
216        this.useUnicodeExtraFields = useUnicodeExtraFields;
217        in = new PushbackInputStream(inputStream, buf.capacity());
218        this.allowStoredEntriesWithDataDescriptor =
219            allowStoredEntriesWithDataDescriptor;
220        // haven't read anything so far
221        buf.limit(0);
222    }
223
224    public ZipArchiveEntry getNextZipEntry() throws IOException {
225        uncompressedCount = 0;
226
227        boolean firstEntry = true;
228        if (closed || hitCentralDirectory) {
229            return null;
230        }
231        if (current != null) {
232            closeEntry();
233            firstEntry = false;
234        }
235
236        long currentHeaderOffset = getBytesRead();
237        try {
238            if (firstEntry) {
239                // split archives have a special signature before the
240                // first local file header - look for it and fail with
241                // the appropriate error message if this is a split
242                // archive.
243                readFirstLocalFileHeader(lfhBuf);
244            } else {
245                readFully(lfhBuf);
246            }
247        } catch (final EOFException e) {
248            return null;
249        }
250
251        final ZipLong sig = new ZipLong(lfhBuf);
252        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
253            hitCentralDirectory = true;
254            skipRemainderOfArchive();
255            return null;
256        }
257        if (!sig.equals(ZipLong.LFH_SIG)) {
258            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
259        }
260
261        int off = WORD;
262        current = new CurrentEntry();
263
264        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
265        off += SHORT;
266        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
267
268        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
269        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
270        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
271        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
272        current.entry.setGeneralPurposeBit(gpFlag);
273
274        off += SHORT;
275
276        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
277        off += SHORT;
278
279        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
280        current.entry.setTime(time);
281        off += WORD;
282
283        ZipLong size = null, cSize = null;
284        if (!current.hasDataDescriptor) {
285            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
286            off += WORD;
287
288            cSize = new ZipLong(lfhBuf, off);
289            off += WORD;
290
291            size = new ZipLong(lfhBuf, off);
292            off += WORD;
293        } else {
294            off += 3 * WORD;
295        }
296
297        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
298
299        off += SHORT;
300
301        final int extraLen = ZipShort.getValue(lfhBuf, off);
302        off += SHORT; // NOSONAR - assignment as documentation
303
304        final byte[] fileName = new byte[fileNameLen];
305        readFully(fileName);
306        current.entry.setName(entryEncoding.decode(fileName), fileName);
307        if (hasUTF8Flag) {
308            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
309        }
310
311        final byte[] extraData = new byte[extraLen];
312        readFully(extraData);
313        current.entry.setExtra(extraData);
314
315        if (!hasUTF8Flag && useUnicodeExtraFields) {
316            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
317        }
318
319        processZip64Extra(size, cSize);
320
321        current.entry.setLocalHeaderOffset(currentHeaderOffset);
322        current.entry.setDataOffset(getBytesRead());
323        current.entry.setStreamContiguous(true);
324
325        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
326        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
327            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
328                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
329                switch (m) {
330                case UNSHRINKING:
331                    current.in = new UnshrinkingInputStream(bis);
332                    break;
333                case IMPLODING:
334                    current.in = new ExplodingInputStream(
335                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
336                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
337                        bis);
338                    break;
339                case BZIP2:
340                    current.in = new BZip2CompressorInputStream(bis);
341                    break;
342                case ENHANCED_DEFLATED:
343                    current.in = new Deflate64CompressorInputStream(bis);
344                    break;
345                default:
346                    // we should never get here as all supported methods have been covered
347                    // will cause an error when read is invoked, don't throw an exception here so people can
348                    // skip unsupported entries
349                    break;
350                }
351            }
352        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
353            current.in = new Deflate64CompressorInputStream(in);
354        }
355
356        entriesRead++;
357        return current.entry;
358    }
359
360    /**
361     * Fills the given array with the first local file header and
362     * deals with splitting/spanning markers that may prefix the first
363     * LFH.
364     */
365    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
366        readFully(lfh);
367        final ZipLong sig = new ZipLong(lfh);
368        if (sig.equals(ZipLong.DD_SIG)) {
369            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
370        }
371
372        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
373            // The archive is not really split as only one segment was
374            // needed in the end.  Just skip over the marker.
375            final byte[] missedLfhBytes = new byte[4];
376            readFully(missedLfhBytes);
377            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
378            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
379        }
380    }
381
382    /**
383     * Records whether a Zip64 extra is present and sets the size
384     * information from it if sizes are 0xFFFFFFFF and the entry
385     * doesn't use a data descriptor.
386     */
387    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
388        final Zip64ExtendedInformationExtraField z64 =
389            (Zip64ExtendedInformationExtraField)
390            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
391        current.usesZip64 = z64 != null;
392        if (!current.hasDataDescriptor) {
393            if (z64 != null // same as current.usesZip64 but avoids NPE warning
394                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
395                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
396                current.entry.setSize(z64.getSize().getLongValue());
397            } else {
398                current.entry.setCompressedSize(cSize.getValue());
399                current.entry.setSize(size.getValue());
400            }
401        }
402    }
403
404    @Override
405    public ArchiveEntry getNextEntry() throws IOException {
406        return getNextZipEntry();
407    }
408
409    /**
410     * Whether this class is able to read the given entry.
411     *
412     * <p>May return false if it is set up to use encryption or a
413     * compression method that hasn't been implemented yet.</p>
414     * @since 1.1
415     */
416    @Override
417    public boolean canReadEntryData(final ArchiveEntry ae) {
418        if (ae instanceof ZipArchiveEntry) {
419            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
420            return ZipUtil.canHandleEntryData(ze)
421                && supportsDataDescriptorFor(ze)
422                && supportsCompressedSizeFor(ze);
423        }
424        return false;
425    }
426
427    @Override
428    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
429        if (closed) {
430            throw new IOException("The stream is closed");
431        }
432
433        if (current == null) {
434            return -1;
435        }
436
437        // avoid int overflow, check null buffer
438        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
439            throw new ArrayIndexOutOfBoundsException();
440        }
441
442        ZipUtil.checkRequestedFeatures(current.entry);
443        if (!supportsDataDescriptorFor(current.entry)) {
444            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
445                    current.entry);
446        }
447        if (!supportsCompressedSizeFor(current.entry)) {
448            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
449                    current.entry);
450        }
451
452        int read;
453        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
454            read = readStored(buffer, offset, length);
455        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
456            read = readDeflated(buffer, offset, length);
457        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
458                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
459                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
460                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
461            read = current.in.read(buffer, offset, length);
462        } else {
463            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
464                    current.entry);
465        }
466
467        if (read >= 0) {
468            current.crc.update(buffer, offset, read);
469            uncompressedCount += read;
470        }
471
472        return read;
473    }
474
475    /**
476     * @since 1.17
477     */
478    @Override
479    public long getCompressedCount() {
480        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
481            return current.bytesRead;
482        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
483            return getBytesInflated();
484        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
485            return ((UnshrinkingInputStream) current.in).getCompressedCount();
486        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
487            return ((ExplodingInputStream) current.in).getCompressedCount();
488        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
489            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
490        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
491            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
492        } else {
493            return -1;
494        }
495    }
496
497    /**
498     * @since 1.17
499     */
500    @Override
501    public long getUncompressedCount() {
502        return uncompressedCount;
503    }
504
505    /**
506     * Implementation of read for STORED entries.
507     */
508    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
509
510        if (current.hasDataDescriptor) {
511            if (lastStoredEntry == null) {
512                readStoredEntry();
513            }
514            return lastStoredEntry.read(buffer, offset, length);
515        }
516
517        final long csize = current.entry.getSize();
518        if (current.bytesRead >= csize) {
519            return -1;
520        }
521
522        if (buf.position() >= buf.limit()) {
523            buf.position(0);
524            final int l = in.read(buf.array());
525            if (l == -1) {
526                buf.limit(0);
527                throw new IOException("Truncated ZIP file");
528            }
529            buf.limit(l);
530
531            count(l);
532            current.bytesReadFromStream += l;
533        }
534
535        int toRead = Math.min(buf.remaining(), length);
536        if ((csize - current.bytesRead) < toRead) {
537            // if it is smaller than toRead then it fits into an int
538            toRead = (int) (csize - current.bytesRead);
539        }
540        buf.get(buffer, offset, toRead);
541        current.bytesRead += toRead;
542        return toRead;
543    }
544
545    /**
546     * Implementation of read for DEFLATED entries.
547     */
548    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
549        final int read = readFromInflater(buffer, offset, length);
550        if (read <= 0) {
551            if (inf.finished()) {
552                return -1;
553            } else if (inf.needsDictionary()) {
554                throw new ZipException("This archive needs a preset dictionary"
555                                       + " which is not supported by Commons"
556                                       + " Compress.");
557            } else if (read == -1) {
558                throw new IOException("Truncated ZIP file");
559            }
560        }
561        return read;
562    }
563
564    /**
565     * Potentially reads more bytes to fill the inflater's buffer and
566     * reads from it.
567     */
568    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
569        int read = 0;
570        do {
571            if (inf.needsInput()) {
572                final int l = fill();
573                if (l > 0) {
574                    current.bytesReadFromStream += buf.limit();
575                } else if (l == -1) {
576                    return -1;
577                } else {
578                    break;
579                }
580            }
581            try {
582                read = inf.inflate(buffer, offset, length);
583            } catch (final DataFormatException e) {
584                throw (IOException) new ZipException(e.getMessage()).initCause(e);
585            }
586        } while (read == 0 && inf.needsInput());
587        return read;
588    }
589
590    @Override
591    public void close() throws IOException {
592        if (!closed) {
593            closed = true;
594            try {
595                in.close();
596            } finally {
597                inf.end();
598            }
599        }
600    }
601
602    /**
603     * Skips over and discards value bytes of data from this input
604     * stream.
605     *
606     * <p>This implementation may end up skipping over some smaller
607     * number of bytes, possibly 0, if and only if it reaches the end
608     * of the underlying stream.</p>
609     *
610     * <p>The actual number of bytes skipped is returned.</p>
611     *
612     * @param value the number of bytes to be skipped.
613     * @return the actual number of bytes skipped.
614     * @throws IOException - if an I/O error occurs.
615     * @throws IllegalArgumentException - if value is negative.
616     */
617    @Override
618    public long skip(final long value) throws IOException {
619        if (value >= 0) {
620            long skipped = 0;
621            while (skipped < value) {
622                final long rem = value - skipped;
623                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
624                if (x == -1) {
625                    return skipped;
626                }
627                skipped += x;
628            }
629            return skipped;
630        }
631        throw new IllegalArgumentException();
632    }
633
634    /**
635     * Checks if the signature matches what is expected for a zip file.
636     * Does not currently handle self-extracting zips which may have arbitrary
637     * leading content.
638     *
639     * @param signature the bytes to check
640     * @param length    the number of bytes to check
641     * @return true, if this stream is a zip archive stream, false otherwise
642     */
643    public static boolean matches(final byte[] signature, final int length) {
644        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
645            return false;
646        }
647
648        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
649            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
650            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
651            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
652    }
653
654    private static boolean checksig(final byte[] signature, final byte[] expected) {
655        for (int i = 0; i < expected.length; i++) {
656            if (signature[i] != expected[i]) {
657                return false;
658            }
659        }
660        return true;
661    }
662
663    /**
664     * Closes the current ZIP archive entry and positions the underlying
665     * stream to the beginning of the next entry. All per-entry variables
666     * and data structures are cleared.
667     * <p>
668     * If the compressed size of this entry is included in the entry header,
669     * then any outstanding bytes are simply skipped from the underlying
670     * stream without uncompressing them. This allows an entry to be safely
671     * closed even if the compression method is unsupported.
672     * <p>
673     * In case we don't know the compressed size of this entry or have
674     * already buffered too much data from the underlying stream to support
675     * uncompression, then the uncompression process is completed and the
676     * end position of the stream is adjusted based on the result of that
677     * process.
678     *
679     * @throws IOException if an error occurs
680     */
681    private void closeEntry() throws IOException {
682        if (closed) {
683            throw new IOException("The stream is closed");
684        }
685        if (current == null) {
686            return;
687        }
688
689        // Ensure all entry bytes are read
690        if (currentEntryHasOutstandingBytes()) {
691            drainCurrentEntryData();
692        } else {
693            // this is guaranteed to exhaust the stream
694            skip(Long.MAX_VALUE); //NOSONAR
695
696            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
697                       ? getBytesInflated() : current.bytesRead;
698
699            // this is at most a single read() operation and can't
700            // exceed the range of int
701            final int diff = (int) (current.bytesReadFromStream - inB);
702
703            // Pushback any required bytes
704            if (diff > 0) {
705                pushback(buf.array(), buf.limit() - diff, diff);
706                current.bytesReadFromStream -= diff;
707            }
708
709            // Drain remainder of entry if not all data bytes were required
710            if (currentEntryHasOutstandingBytes()) {
711                drainCurrentEntryData();
712            }
713        }
714
715        if (lastStoredEntry == null && current.hasDataDescriptor) {
716            readDataDescriptor();
717        }
718
719        inf.reset();
720        buf.clear().flip();
721        current = null;
722        lastStoredEntry = null;
723    }
724
725    /**
726     * If the compressed size of the current entry is included in the entry header
727     * and there are any outstanding bytes in the underlying stream, then
728     * this returns true.
729     *
730     * @return true, if current entry is determined to have outstanding bytes, false otherwise
731     */
732    private boolean currentEntryHasOutstandingBytes() {
733        return current.bytesReadFromStream <= current.entry.getCompressedSize()
734                && !current.hasDataDescriptor;
735    }
736
737    /**
738     * Read all data of the current entry from the underlying stream
739     * that hasn't been read, yet.
740     */
741    private void drainCurrentEntryData() throws IOException {
742        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
743        while (remaining > 0) {
744            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
745            if (n < 0) {
746                throw new EOFException("Truncated ZIP entry: "
747                                       + ArchiveUtils.sanitize(current.entry.getName()));
748            }
749            count(n);
750            remaining -= n;
751        }
752    }
753
754    /**
755     * Get the number of bytes Inflater has actually processed.
756     *
757     * <p>for Java &lt; Java7 the getBytes* methods in
758     * Inflater/Deflater seem to return unsigned ints rather than
759     * longs that start over with 0 at 2^32.</p>
760     *
761     * <p>The stream knows how many bytes it has read, but not how
762     * many the Inflater actually consumed - it should be between the
763     * total number of bytes read for the entry and the total number
764     * minus the last read operation.  Here we just try to make the
765     * value close enough to the bytes we've read by assuming the
766     * number of bytes consumed must be smaller than (or equal to) the
767     * number of bytes read but not smaller by more than 2^32.</p>
768     */
769    private long getBytesInflated() {
770        long inB = inf.getBytesRead();
771        if (current.bytesReadFromStream >= TWO_EXP_32) {
772            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
773                inB += TWO_EXP_32;
774            }
775        }
776        return inB;
777    }
778
779    private int fill() throws IOException {
780        if (closed) {
781            throw new IOException("The stream is closed");
782        }
783        final int length = in.read(buf.array());
784        if (length > 0) {
785            buf.limit(length);
786            count(buf.limit());
787            inf.setInput(buf.array(), 0, buf.limit());
788        }
789        return length;
790    }
791
792    private void readFully(final byte[] b) throws IOException {
793        final int count = IOUtils.readFully(in, b);
794        count(count);
795        if (count < b.length) {
796            throw new EOFException();
797        }
798    }
799
800    private void readDataDescriptor() throws IOException {
801        readFully(wordBuf);
802        ZipLong val = new ZipLong(wordBuf);
803        if (ZipLong.DD_SIG.equals(val)) {
804            // data descriptor with signature, skip sig
805            readFully(wordBuf);
806            val = new ZipLong(wordBuf);
807        }
808        current.entry.setCrc(val.getValue());
809
810        // if there is a ZIP64 extra field, sizes are eight bytes
811        // each, otherwise four bytes each.  Unfortunately some
812        // implementations - namely Java7 - use eight bytes without
813        // using a ZIP64 extra field -
814        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
815
816        // just read 16 bytes and check whether bytes nine to twelve
817        // look like one of the signatures of what could follow a data
818        // descriptor (ignoring archive decryption headers for now).
819        // If so, push back eight bytes and assume sizes are four
820        // bytes, otherwise sizes are eight bytes each.
821        readFully(twoDwordBuf);
822        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
823        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
824            pushback(twoDwordBuf, DWORD, DWORD);
825            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
826            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
827        } else {
828            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
829            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
830        }
831    }
832
833    /**
834     * Whether this entry requires a data descriptor this library can work with.
835     *
836     * @return true if allowStoredEntriesWithDataDescriptor is true,
837     * the entry doesn't require any data descriptor or the method is
838     * DEFLATED or ENHANCED_DEFLATED.
839     */
840    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
841        return !entry.getGeneralPurposeBit().usesDataDescriptor()
842
843                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
844                || entry.getMethod() == ZipEntry.DEFLATED
845                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
846    }
847
848    /**
849     * Whether the compressed size for the entry is either known or
850     * not required by the compression method being used.
851     */
852    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
853        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
854            || entry.getMethod() == ZipEntry.DEFLATED
855            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
856            || (entry.getGeneralPurposeBit().usesDataDescriptor()
857                && allowStoredEntriesWithDataDescriptor
858                && entry.getMethod() == ZipEntry.STORED);
859    }
860
861    /**
862     * Caches a stored entry that uses the data descriptor.
863     *
864     * <ul>
865     *   <li>Reads a stored entry until the signature of a local file
866     *     header, central directory header or data descriptor has been
867     *     found.</li>
868     *   <li>Stores all entry data in lastStoredEntry.</p>
869     *   <li>Rewinds the stream to position at the data
870     *     descriptor.</li>
871     *   <li>reads the data descriptor</li>
872     * </ul>
873     *
874     * <p>After calling this method the entry should know its size,
875     * the entry's data is cached and the stream is positioned at the
876     * next local file or central directory header.</p>
877     */
878    private void readStoredEntry() throws IOException {
879        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
880        int off = 0;
881        boolean done = false;
882
883        // length of DD without signature
884        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
885
886        while (!done) {
887            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
888            if (r <= 0) {
889                // read the whole archive without ever finding a
890                // central directory
891                throw new IOException("Truncated ZIP file");
892            }
893            if (r + off < 4) {
894                // buffer too small to check for a signature, loop
895                off += r;
896                continue;
897            }
898
899            done = bufferContainsSignature(bos, off, r, ddLen);
900            if (!done) {
901                off = cacheBytesRead(bos, off, r, ddLen);
902            }
903        }
904
905        final byte[] b = bos.toByteArray();
906        lastStoredEntry = new ByteArrayInputStream(b);
907    }
908
909    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
910    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
911    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
912
913    /**
914     * Checks whether the current buffer contains the signature of a
915     * &quot;data descriptor&quot;, &quot;local file header&quot; or
916     * &quot;central directory entry&quot;.
917     *
918     * <p>If it contains such a signature, reads the data descriptor
919     * and positions the stream right after the data descriptor.</p>
920     */
921    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
922            throws IOException {
923
924        boolean done = false;
925        int readTooMuch = 0;
926        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
927            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
928                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
929                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
930                    // found a LFH or CFH:
931                    readTooMuch = offset + lastRead - i - expectedDDLen;
932                    done = true;
933                }
934                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
935                    // found DD:
936                    readTooMuch = offset + lastRead - i;
937                    done = true;
938                }
939                if (done) {
940                    // * push back bytes read in excess as well as the data
941                    //   descriptor
942                    // * copy the remaining bytes to cache
943                    // * read data descriptor
944                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
945                    bos.write(buf.array(), 0, i);
946                    readDataDescriptor();
947                }
948            }
949        }
950        return done;
951    }
952
953    /**
954     * If the last read bytes could hold a data descriptor and an
955     * incomplete signature then save the last bytes to the front of
956     * the buffer and cache everything in front of the potential data
957     * descriptor into the given ByteArrayOutputStream.
958     *
959     * <p>Data descriptor plus incomplete signature (3 bytes in the
960     * worst case) can be 20 bytes max.</p>
961     */
962    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
963        final int cacheable = offset + lastRead - expecteDDLen - 3;
964        if (cacheable > 0) {
965            bos.write(buf.array(), 0, cacheable);
966            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
967            offset = expecteDDLen + 3;
968        } else {
969            offset += lastRead;
970        }
971        return offset;
972    }
973
974    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
975        ((PushbackInputStream) in).unread(buf, offset, length);
976        pushedBackBytes(length);
977    }
978
979    // End of Central Directory Record
980    //   end of central dir signature    WORD
981    //   number of this disk             SHORT
982    //   number of the disk with the
983    //   start of the central directory  SHORT
984    //   total number of entries in the
985    //   central directory on this disk  SHORT
986    //   total number of entries in
987    //   the central directory           SHORT
988    //   size of the central directory   WORD
989    //   offset of start of central
990    //   directory with respect to
991    //   the starting disk number        WORD
992    //   .ZIP file comment length        SHORT
993    //   .ZIP file comment               up to 64KB
994    //
995
996    /**
997     * Reads the stream until it find the "End of central directory
998     * record" and consumes it as well.
999     */
1000    private void skipRemainderOfArchive() throws IOException {
1001        // skip over central directory. One LFH has been read too much
1002        // already.  The calculation discounts file names and extra
1003        // data so it will be too short.
1004        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1005        findEocdRecord();
1006        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1007        readFully(shortBuf);
1008        // file comment
1009        realSkip(ZipShort.getValue(shortBuf));
1010    }
1011
1012    /**
1013     * Reads forward until the signature of the &quot;End of central
1014     * directory&quot; record is found.
1015     */
1016    private void findEocdRecord() throws IOException {
1017        int currentByte = -1;
1018        boolean skipReadCall = false;
1019        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1020            skipReadCall = false;
1021            if (!isFirstByteOfEocdSig(currentByte)) {
1022                continue;
1023            }
1024            currentByte = readOneByte();
1025            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1026                if (currentByte == -1) {
1027                    break;
1028                }
1029                skipReadCall = isFirstByteOfEocdSig(currentByte);
1030                continue;
1031            }
1032            currentByte = readOneByte();
1033            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1034                if (currentByte == -1) {
1035                    break;
1036                }
1037                skipReadCall = isFirstByteOfEocdSig(currentByte);
1038                continue;
1039            }
1040            currentByte = readOneByte();
1041            if (currentByte == -1
1042                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1043                break;
1044            }
1045            skipReadCall = isFirstByteOfEocdSig(currentByte);
1046        }
1047    }
1048
1049    /**
1050     * Skips bytes by reading from the underlying stream rather than
1051     * the (potentially inflating) archive stream - which {@link
1052     * #skip} would do.
1053     *
1054     * Also updates bytes-read counter.
1055     */
1056    private void realSkip(final long value) throws IOException {
1057        if (value >= 0) {
1058            long skipped = 0;
1059            while (skipped < value) {
1060                final long rem = value - skipped;
1061                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1062                if (x == -1) {
1063                    return;
1064                }
1065                count(x);
1066                skipped += x;
1067            }
1068            return;
1069        }
1070        throw new IllegalArgumentException();
1071    }
1072
1073    /**
1074     * Reads bytes by reading from the underlying stream rather than
1075     * the (potentially inflating) archive stream - which {@link #read} would do.
1076     *
1077     * Also updates bytes-read counter.
1078     */
1079    private int readOneByte() throws IOException {
1080        final int b = in.read();
1081        if (b != -1) {
1082            count(1);
1083        }
1084        return b;
1085    }
1086
1087    private boolean isFirstByteOfEocdSig(final int b) {
1088        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1089    }
1090
1091    /**
1092     * Structure collecting information for the entry that is
1093     * currently being read.
1094     */
1095    private static final class CurrentEntry {
1096
1097        /**
1098         * Current ZIP entry.
1099         */
1100        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1101
1102        /**
1103         * Does the entry use a data descriptor?
1104         */
1105        private boolean hasDataDescriptor;
1106
1107        /**
1108         * Does the entry have a ZIP64 extended information extra field.
1109         */
1110        private boolean usesZip64;
1111
1112        /**
1113         * Number of bytes of entry content read by the client if the
1114         * entry is STORED.
1115         */
1116        private long bytesRead;
1117
1118        /**
1119         * Number of bytes of entry content read from the stream.
1120         *
1121         * <p>This may be more than the actual entry's length as some
1122         * stuff gets buffered up and needs to be pushed back when the
1123         * end of the entry has been reached.</p>
1124         */
1125        private long bytesReadFromStream;
1126
1127        /**
1128         * The checksum calculated as the current entry is read.
1129         */
1130        private final CRC32 crc = new CRC32();
1131
1132        /**
1133         * The input stream decompressing the data for shrunk and imploded entries.
1134         */
1135        private InputStream in;
1136    }
1137
1138    /**
1139     * Bounded input stream adapted from commons-io
1140     */
1141    private class BoundedInputStream extends InputStream {
1142
1143        /** the wrapped input stream */
1144        private final InputStream in;
1145
1146        /** the max length to provide */
1147        private final long max;
1148
1149        /** the number of bytes already returned */
1150        private long pos = 0;
1151
1152        /**
1153         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1154         * stream and limits it to a certain size.
1155         *
1156         * @param in The wrapped input stream
1157         * @param size The maximum number of bytes to return
1158         */
1159        public BoundedInputStream(final InputStream in, final long size) {
1160            this.max = size;
1161            this.in = in;
1162        }
1163
1164        @Override
1165        public int read() throws IOException {
1166            if (max >= 0 && pos >= max) {
1167                return -1;
1168            }
1169            final int result = in.read();
1170            pos++;
1171            count(1);
1172            current.bytesReadFromStream++;
1173            return result;
1174        }
1175
1176        @Override
1177        public int read(final byte[] b) throws IOException {
1178            return this.read(b, 0, b.length);
1179        }
1180
1181        @Override
1182        public int read(final byte[] b, final int off, final int len) throws IOException {
1183            if (max >= 0 && pos >= max) {
1184                return -1;
1185            }
1186            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1187            final int bytesRead = in.read(b, off, (int) maxRead);
1188
1189            if (bytesRead == -1) {
1190                return -1;
1191            }
1192
1193            pos += bytesRead;
1194            count(bytesRead);
1195            current.bytesReadFromStream += bytesRead;
1196            return bytesRead;
1197        }
1198
1199        @Override
1200        public long skip(final long n) throws IOException {
1201            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1202            final long skippedBytes = IOUtils.skip(in, toSkip);
1203            pos += skippedBytes;
1204            return skippedBytes;
1205        }
1206
1207        @Override
1208        public int available() throws IOException {
1209            if (max >= 0 && pos >= max) {
1210                return 0;
1211            }
1212            return in.available();
1213        }
1214    }
1215}