001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 038import org.apache.commons.compress.utils.ArchiveUtils; 039import org.apache.commons.compress.utils.IOUtils; 040import org.apache.commons.compress.utils.InputStreamStatistics; 041 042import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 046 047/** 048 * Implements an input stream that can read Zip archives. 049 * 050 * <p>As of Apache Commons Compress it transparently supports Zip64 051 * extensions and thus individual entries and archives larger than 4 052 * GB or with more than 65536 entries.</p> 053 * 054 * <p>The {@link ZipFile} class is preferred when reading from files 055 * as {@link ZipArchiveInputStream} is limited by not being able to 056 * read the central directory header before returning entries. In 057 * particular {@link ZipArchiveInputStream}</p> 058 * 059 * <ul> 060 * 061 * <li>may return entries that are not part of the central directory 062 * at all and shouldn't be considered part of the archive.</li> 063 * 064 * <li>may return several entries with the same name.</li> 065 * 066 * <li>will not return internal or external attributes.</li> 067 * 068 * <li>may return incomplete extra field data.</li> 069 * 070 * <li>may return unknown sizes and CRC values for entries until the 071 * next entry has been reached if the archive uses the data 072 * descriptor feature.</li> 073 * 074 * </ul> 075 * 076 * @see ZipFile 077 * @NotThreadSafe 078 */ 079public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics { 080 081 /** The zip encoding to use for filenames and the file comment. */ 082 private final ZipEncoding zipEncoding; 083 084 // the provided encoding (for unit tests) 085 final String encoding; 086 087 /** Whether to look for and use Unicode extra fields. */ 088 private final boolean useUnicodeExtraFields; 089 090 /** Wrapped stream, will always be a PushbackInputStream. */ 091 private final InputStream in; 092 093 /** Inflater used for all deflated entries. */ 094 private final Inflater inf = new Inflater(true); 095 096 /** Buffer used to read from the wrapped stream. */ 097 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 098 099 /** The entry that is currently being read. */ 100 private CurrentEntry current = null; 101 102 /** Whether the stream has been closed. */ 103 private boolean closed = false; 104 105 /** Whether the stream has reached the central directory - and thus found all entries. */ 106 private boolean hitCentralDirectory = false; 107 108 /** 109 * When reading a stored entry that uses the data descriptor this 110 * stream has to read the full entry and caches it. This is the 111 * cache. 112 */ 113 private ByteArrayInputStream lastStoredEntry = null; 114 115 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 116 private boolean allowStoredEntriesWithDataDescriptor = false; 117 118 /** Count decompressed bytes for current entry */ 119 private long uncompressedCount = 0; 120 121 private static final int LFH_LEN = 30; 122 /* 123 local file header signature WORD 124 version needed to extract SHORT 125 general purpose bit flag SHORT 126 compression method SHORT 127 last mod file time SHORT 128 last mod file date SHORT 129 crc-32 WORD 130 compressed size WORD 131 uncompressed size WORD 132 file name length SHORT 133 extra field length SHORT 134 */ 135 136 private static final int CFH_LEN = 46; 137 /* 138 central file header signature WORD 139 version made by SHORT 140 version needed to extract SHORT 141 general purpose bit flag SHORT 142 compression method SHORT 143 last mod file time SHORT 144 last mod file date SHORT 145 crc-32 WORD 146 compressed size WORD 147 uncompressed size WORD 148 file name length SHORT 149 extra field length SHORT 150 file comment length SHORT 151 disk number start SHORT 152 internal file attributes SHORT 153 external file attributes WORD 154 relative offset of local header WORD 155 */ 156 157 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 158 159 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 160 private final byte[] lfhBuf = new byte[LFH_LEN]; 161 private final byte[] skipBuf = new byte[1024]; 162 private final byte[] shortBuf = new byte[SHORT]; 163 private final byte[] wordBuf = new byte[WORD]; 164 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 165 166 private int entriesRead = 0; 167 168 /** 169 * Create an instance using UTF-8 encoding 170 * @param inputStream the stream to wrap 171 */ 172 public ZipArchiveInputStream(final InputStream inputStream) { 173 this(inputStream, ZipEncodingHelper.UTF8); 174 } 175 176 /** 177 * Create an instance using the specified encoding 178 * @param inputStream the stream to wrap 179 * @param encoding the encoding to use for file names, use null 180 * for the platform's default encoding 181 * @since 1.5 182 */ 183 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 184 this(inputStream, encoding, true); 185 } 186 187 /** 188 * Create an instance using the specified encoding 189 * @param inputStream the stream to wrap 190 * @param encoding the encoding to use for file names, use null 191 * for the platform's default encoding 192 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 193 * Extra Fields (if present) to set the file names. 194 */ 195 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 196 this(inputStream, encoding, useUnicodeExtraFields, false); 197 } 198 199 /** 200 * Create an instance using the specified encoding 201 * @param inputStream the stream to wrap 202 * @param encoding the encoding to use for file names, use null 203 * for the platform's default encoding 204 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 205 * Extra Fields (if present) to set the file names. 206 * @param allowStoredEntriesWithDataDescriptor whether the stream 207 * will try to read STORED entries that use a data descriptor 208 * @since 1.1 209 */ 210 public ZipArchiveInputStream(final InputStream inputStream, 211 final String encoding, 212 final boolean useUnicodeExtraFields, 213 final boolean allowStoredEntriesWithDataDescriptor) { 214 this.encoding = encoding; 215 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 216 this.useUnicodeExtraFields = useUnicodeExtraFields; 217 in = new PushbackInputStream(inputStream, buf.capacity()); 218 this.allowStoredEntriesWithDataDescriptor = 219 allowStoredEntriesWithDataDescriptor; 220 // haven't read anything so far 221 buf.limit(0); 222 } 223 224 public ZipArchiveEntry getNextZipEntry() throws IOException { 225 uncompressedCount = 0; 226 227 boolean firstEntry = true; 228 if (closed || hitCentralDirectory) { 229 return null; 230 } 231 if (current != null) { 232 closeEntry(); 233 firstEntry = false; 234 } 235 236 long currentHeaderOffset = getBytesRead(); 237 try { 238 if (firstEntry) { 239 // split archives have a special signature before the 240 // first local file header - look for it and fail with 241 // the appropriate error message if this is a split 242 // archive. 243 readFirstLocalFileHeader(lfhBuf); 244 } else { 245 readFully(lfhBuf); 246 } 247 } catch (final EOFException e) { 248 return null; 249 } 250 251 final ZipLong sig = new ZipLong(lfhBuf); 252 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 253 hitCentralDirectory = true; 254 skipRemainderOfArchive(); 255 return null; 256 } 257 if (!sig.equals(ZipLong.LFH_SIG)) { 258 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 259 } 260 261 int off = WORD; 262 current = new CurrentEntry(); 263 264 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 265 off += SHORT; 266 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 267 268 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 269 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 270 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 271 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 272 current.entry.setGeneralPurposeBit(gpFlag); 273 274 off += SHORT; 275 276 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 277 off += SHORT; 278 279 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 280 current.entry.setTime(time); 281 off += WORD; 282 283 ZipLong size = null, cSize = null; 284 if (!current.hasDataDescriptor) { 285 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 286 off += WORD; 287 288 cSize = new ZipLong(lfhBuf, off); 289 off += WORD; 290 291 size = new ZipLong(lfhBuf, off); 292 off += WORD; 293 } else { 294 off += 3 * WORD; 295 } 296 297 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 298 299 off += SHORT; 300 301 final int extraLen = ZipShort.getValue(lfhBuf, off); 302 off += SHORT; // NOSONAR - assignment as documentation 303 304 final byte[] fileName = new byte[fileNameLen]; 305 readFully(fileName); 306 current.entry.setName(entryEncoding.decode(fileName), fileName); 307 if (hasUTF8Flag) { 308 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 309 } 310 311 final byte[] extraData = new byte[extraLen]; 312 readFully(extraData); 313 current.entry.setExtra(extraData); 314 315 if (!hasUTF8Flag && useUnicodeExtraFields) { 316 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 317 } 318 319 processZip64Extra(size, cSize); 320 321 current.entry.setLocalHeaderOffset(currentHeaderOffset); 322 current.entry.setDataOffset(getBytesRead()); 323 current.entry.setStreamContiguous(true); 324 325 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 326 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 327 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 328 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 329 switch (m) { 330 case UNSHRINKING: 331 current.in = new UnshrinkingInputStream(bis); 332 break; 333 case IMPLODING: 334 current.in = new ExplodingInputStream( 335 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 336 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 337 bis); 338 break; 339 case BZIP2: 340 current.in = new BZip2CompressorInputStream(bis); 341 break; 342 case ENHANCED_DEFLATED: 343 current.in = new Deflate64CompressorInputStream(bis); 344 break; 345 default: 346 // we should never get here as all supported methods have been covered 347 // will cause an error when read is invoked, don't throw an exception here so people can 348 // skip unsupported entries 349 break; 350 } 351 } 352 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 353 current.in = new Deflate64CompressorInputStream(in); 354 } 355 356 entriesRead++; 357 return current.entry; 358 } 359 360 /** 361 * Fills the given array with the first local file header and 362 * deals with splitting/spanning markers that may prefix the first 363 * LFH. 364 */ 365 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 366 readFully(lfh); 367 final ZipLong sig = new ZipLong(lfh); 368 if (sig.equals(ZipLong.DD_SIG)) { 369 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 370 } 371 372 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 373 // The archive is not really split as only one segment was 374 // needed in the end. Just skip over the marker. 375 final byte[] missedLfhBytes = new byte[4]; 376 readFully(missedLfhBytes); 377 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 378 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 379 } 380 } 381 382 /** 383 * Records whether a Zip64 extra is present and sets the size 384 * information from it if sizes are 0xFFFFFFFF and the entry 385 * doesn't use a data descriptor. 386 */ 387 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 388 final Zip64ExtendedInformationExtraField z64 = 389 (Zip64ExtendedInformationExtraField) 390 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 391 current.usesZip64 = z64 != null; 392 if (!current.hasDataDescriptor) { 393 if (z64 != null // same as current.usesZip64 but avoids NPE warning 394 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 395 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 396 current.entry.setSize(z64.getSize().getLongValue()); 397 } else { 398 current.entry.setCompressedSize(cSize.getValue()); 399 current.entry.setSize(size.getValue()); 400 } 401 } 402 } 403 404 @Override 405 public ArchiveEntry getNextEntry() throws IOException { 406 return getNextZipEntry(); 407 } 408 409 /** 410 * Whether this class is able to read the given entry. 411 * 412 * <p>May return false if it is set up to use encryption or a 413 * compression method that hasn't been implemented yet.</p> 414 * @since 1.1 415 */ 416 @Override 417 public boolean canReadEntryData(final ArchiveEntry ae) { 418 if (ae instanceof ZipArchiveEntry) { 419 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 420 return ZipUtil.canHandleEntryData(ze) 421 && supportsDataDescriptorFor(ze) 422 && supportsCompressedSizeFor(ze); 423 } 424 return false; 425 } 426 427 @Override 428 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 429 if (closed) { 430 throw new IOException("The stream is closed"); 431 } 432 433 if (current == null) { 434 return -1; 435 } 436 437 // avoid int overflow, check null buffer 438 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 439 throw new ArrayIndexOutOfBoundsException(); 440 } 441 442 ZipUtil.checkRequestedFeatures(current.entry); 443 if (!supportsDataDescriptorFor(current.entry)) { 444 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 445 current.entry); 446 } 447 if (!supportsCompressedSizeFor(current.entry)) { 448 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 449 current.entry); 450 } 451 452 int read; 453 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 454 read = readStored(buffer, offset, length); 455 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 456 read = readDeflated(buffer, offset, length); 457 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 458 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 459 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 460 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 461 read = current.in.read(buffer, offset, length); 462 } else { 463 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 464 current.entry); 465 } 466 467 if (read >= 0) { 468 current.crc.update(buffer, offset, read); 469 uncompressedCount += read; 470 } 471 472 return read; 473 } 474 475 /** 476 * @since 1.17 477 */ 478 @Override 479 public long getCompressedCount() { 480 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 481 return current.bytesRead; 482 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 483 return getBytesInflated(); 484 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 485 return ((UnshrinkingInputStream) current.in).getCompressedCount(); 486 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 487 return ((ExplodingInputStream) current.in).getCompressedCount(); 488 } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) { 489 return ((Deflate64CompressorInputStream) current.in).getCompressedCount(); 490 } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 491 return ((BZip2CompressorInputStream) current.in).getCompressedCount(); 492 } else { 493 return -1; 494 } 495 } 496 497 /** 498 * @since 1.17 499 */ 500 @Override 501 public long getUncompressedCount() { 502 return uncompressedCount; 503 } 504 505 /** 506 * Implementation of read for STORED entries. 507 */ 508 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 509 510 if (current.hasDataDescriptor) { 511 if (lastStoredEntry == null) { 512 readStoredEntry(); 513 } 514 return lastStoredEntry.read(buffer, offset, length); 515 } 516 517 final long csize = current.entry.getSize(); 518 if (current.bytesRead >= csize) { 519 return -1; 520 } 521 522 if (buf.position() >= buf.limit()) { 523 buf.position(0); 524 final int l = in.read(buf.array()); 525 if (l == -1) { 526 buf.limit(0); 527 throw new IOException("Truncated ZIP file"); 528 } 529 buf.limit(l); 530 531 count(l); 532 current.bytesReadFromStream += l; 533 } 534 535 int toRead = Math.min(buf.remaining(), length); 536 if ((csize - current.bytesRead) < toRead) { 537 // if it is smaller than toRead then it fits into an int 538 toRead = (int) (csize - current.bytesRead); 539 } 540 buf.get(buffer, offset, toRead); 541 current.bytesRead += toRead; 542 return toRead; 543 } 544 545 /** 546 * Implementation of read for DEFLATED entries. 547 */ 548 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 549 final int read = readFromInflater(buffer, offset, length); 550 if (read <= 0) { 551 if (inf.finished()) { 552 return -1; 553 } else if (inf.needsDictionary()) { 554 throw new ZipException("This archive needs a preset dictionary" 555 + " which is not supported by Commons" 556 + " Compress."); 557 } else if (read == -1) { 558 throw new IOException("Truncated ZIP file"); 559 } 560 } 561 return read; 562 } 563 564 /** 565 * Potentially reads more bytes to fill the inflater's buffer and 566 * reads from it. 567 */ 568 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 569 int read = 0; 570 do { 571 if (inf.needsInput()) { 572 final int l = fill(); 573 if (l > 0) { 574 current.bytesReadFromStream += buf.limit(); 575 } else if (l == -1) { 576 return -1; 577 } else { 578 break; 579 } 580 } 581 try { 582 read = inf.inflate(buffer, offset, length); 583 } catch (final DataFormatException e) { 584 throw (IOException) new ZipException(e.getMessage()).initCause(e); 585 } 586 } while (read == 0 && inf.needsInput()); 587 return read; 588 } 589 590 @Override 591 public void close() throws IOException { 592 if (!closed) { 593 closed = true; 594 try { 595 in.close(); 596 } finally { 597 inf.end(); 598 } 599 } 600 } 601 602 /** 603 * Skips over and discards value bytes of data from this input 604 * stream. 605 * 606 * <p>This implementation may end up skipping over some smaller 607 * number of bytes, possibly 0, if and only if it reaches the end 608 * of the underlying stream.</p> 609 * 610 * <p>The actual number of bytes skipped is returned.</p> 611 * 612 * @param value the number of bytes to be skipped. 613 * @return the actual number of bytes skipped. 614 * @throws IOException - if an I/O error occurs. 615 * @throws IllegalArgumentException - if value is negative. 616 */ 617 @Override 618 public long skip(final long value) throws IOException { 619 if (value >= 0) { 620 long skipped = 0; 621 while (skipped < value) { 622 final long rem = value - skipped; 623 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 624 if (x == -1) { 625 return skipped; 626 } 627 skipped += x; 628 } 629 return skipped; 630 } 631 throw new IllegalArgumentException(); 632 } 633 634 /** 635 * Checks if the signature matches what is expected for a zip file. 636 * Does not currently handle self-extracting zips which may have arbitrary 637 * leading content. 638 * 639 * @param signature the bytes to check 640 * @param length the number of bytes to check 641 * @return true, if this stream is a zip archive stream, false otherwise 642 */ 643 public static boolean matches(final byte[] signature, final int length) { 644 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 645 return false; 646 } 647 648 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 649 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 650 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 651 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 652 } 653 654 private static boolean checksig(final byte[] signature, final byte[] expected) { 655 for (int i = 0; i < expected.length; i++) { 656 if (signature[i] != expected[i]) { 657 return false; 658 } 659 } 660 return true; 661 } 662 663 /** 664 * Closes the current ZIP archive entry and positions the underlying 665 * stream to the beginning of the next entry. All per-entry variables 666 * and data structures are cleared. 667 * <p> 668 * If the compressed size of this entry is included in the entry header, 669 * then any outstanding bytes are simply skipped from the underlying 670 * stream without uncompressing them. This allows an entry to be safely 671 * closed even if the compression method is unsupported. 672 * <p> 673 * In case we don't know the compressed size of this entry or have 674 * already buffered too much data from the underlying stream to support 675 * uncompression, then the uncompression process is completed and the 676 * end position of the stream is adjusted based on the result of that 677 * process. 678 * 679 * @throws IOException if an error occurs 680 */ 681 private void closeEntry() throws IOException { 682 if (closed) { 683 throw new IOException("The stream is closed"); 684 } 685 if (current == null) { 686 return; 687 } 688 689 // Ensure all entry bytes are read 690 if (currentEntryHasOutstandingBytes()) { 691 drainCurrentEntryData(); 692 } else { 693 // this is guaranteed to exhaust the stream 694 skip(Long.MAX_VALUE); //NOSONAR 695 696 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 697 ? getBytesInflated() : current.bytesRead; 698 699 // this is at most a single read() operation and can't 700 // exceed the range of int 701 final int diff = (int) (current.bytesReadFromStream - inB); 702 703 // Pushback any required bytes 704 if (diff > 0) { 705 pushback(buf.array(), buf.limit() - diff, diff); 706 current.bytesReadFromStream -= diff; 707 } 708 709 // Drain remainder of entry if not all data bytes were required 710 if (currentEntryHasOutstandingBytes()) { 711 drainCurrentEntryData(); 712 } 713 } 714 715 if (lastStoredEntry == null && current.hasDataDescriptor) { 716 readDataDescriptor(); 717 } 718 719 inf.reset(); 720 buf.clear().flip(); 721 current = null; 722 lastStoredEntry = null; 723 } 724 725 /** 726 * If the compressed size of the current entry is included in the entry header 727 * and there are any outstanding bytes in the underlying stream, then 728 * this returns true. 729 * 730 * @return true, if current entry is determined to have outstanding bytes, false otherwise 731 */ 732 private boolean currentEntryHasOutstandingBytes() { 733 return current.bytesReadFromStream <= current.entry.getCompressedSize() 734 && !current.hasDataDescriptor; 735 } 736 737 /** 738 * Read all data of the current entry from the underlying stream 739 * that hasn't been read, yet. 740 */ 741 private void drainCurrentEntryData() throws IOException { 742 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 743 while (remaining > 0) { 744 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 745 if (n < 0) { 746 throw new EOFException("Truncated ZIP entry: " 747 + ArchiveUtils.sanitize(current.entry.getName())); 748 } 749 count(n); 750 remaining -= n; 751 } 752 } 753 754 /** 755 * Get the number of bytes Inflater has actually processed. 756 * 757 * <p>for Java < Java7 the getBytes* methods in 758 * Inflater/Deflater seem to return unsigned ints rather than 759 * longs that start over with 0 at 2^32.</p> 760 * 761 * <p>The stream knows how many bytes it has read, but not how 762 * many the Inflater actually consumed - it should be between the 763 * total number of bytes read for the entry and the total number 764 * minus the last read operation. Here we just try to make the 765 * value close enough to the bytes we've read by assuming the 766 * number of bytes consumed must be smaller than (or equal to) the 767 * number of bytes read but not smaller by more than 2^32.</p> 768 */ 769 private long getBytesInflated() { 770 long inB = inf.getBytesRead(); 771 if (current.bytesReadFromStream >= TWO_EXP_32) { 772 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 773 inB += TWO_EXP_32; 774 } 775 } 776 return inB; 777 } 778 779 private int fill() throws IOException { 780 if (closed) { 781 throw new IOException("The stream is closed"); 782 } 783 final int length = in.read(buf.array()); 784 if (length > 0) { 785 buf.limit(length); 786 count(buf.limit()); 787 inf.setInput(buf.array(), 0, buf.limit()); 788 } 789 return length; 790 } 791 792 private void readFully(final byte[] b) throws IOException { 793 final int count = IOUtils.readFully(in, b); 794 count(count); 795 if (count < b.length) { 796 throw new EOFException(); 797 } 798 } 799 800 private void readDataDescriptor() throws IOException { 801 readFully(wordBuf); 802 ZipLong val = new ZipLong(wordBuf); 803 if (ZipLong.DD_SIG.equals(val)) { 804 // data descriptor with signature, skip sig 805 readFully(wordBuf); 806 val = new ZipLong(wordBuf); 807 } 808 current.entry.setCrc(val.getValue()); 809 810 // if there is a ZIP64 extra field, sizes are eight bytes 811 // each, otherwise four bytes each. Unfortunately some 812 // implementations - namely Java7 - use eight bytes without 813 // using a ZIP64 extra field - 814 // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 815 816 // just read 16 bytes and check whether bytes nine to twelve 817 // look like one of the signatures of what could follow a data 818 // descriptor (ignoring archive decryption headers for now). 819 // If so, push back eight bytes and assume sizes are four 820 // bytes, otherwise sizes are eight bytes each. 821 readFully(twoDwordBuf); 822 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 823 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 824 pushback(twoDwordBuf, DWORD, DWORD); 825 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 826 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 827 } else { 828 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 829 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 830 } 831 } 832 833 /** 834 * Whether this entry requires a data descriptor this library can work with. 835 * 836 * @return true if allowStoredEntriesWithDataDescriptor is true, 837 * the entry doesn't require any data descriptor or the method is 838 * DEFLATED or ENHANCED_DEFLATED. 839 */ 840 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 841 return !entry.getGeneralPurposeBit().usesDataDescriptor() 842 843 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 844 || entry.getMethod() == ZipEntry.DEFLATED 845 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 846 } 847 848 /** 849 * Whether the compressed size for the entry is either known or 850 * not required by the compression method being used. 851 */ 852 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 853 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 854 || entry.getMethod() == ZipEntry.DEFLATED 855 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 856 || (entry.getGeneralPurposeBit().usesDataDescriptor() 857 && allowStoredEntriesWithDataDescriptor 858 && entry.getMethod() == ZipEntry.STORED); 859 } 860 861 /** 862 * Caches a stored entry that uses the data descriptor. 863 * 864 * <ul> 865 * <li>Reads a stored entry until the signature of a local file 866 * header, central directory header or data descriptor has been 867 * found.</li> 868 * <li>Stores all entry data in lastStoredEntry.</p> 869 * <li>Rewinds the stream to position at the data 870 * descriptor.</li> 871 * <li>reads the data descriptor</li> 872 * </ul> 873 * 874 * <p>After calling this method the entry should know its size, 875 * the entry's data is cached and the stream is positioned at the 876 * next local file or central directory header.</p> 877 */ 878 private void readStoredEntry() throws IOException { 879 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 880 int off = 0; 881 boolean done = false; 882 883 // length of DD without signature 884 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 885 886 while (!done) { 887 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 888 if (r <= 0) { 889 // read the whole archive without ever finding a 890 // central directory 891 throw new IOException("Truncated ZIP file"); 892 } 893 if (r + off < 4) { 894 // buffer too small to check for a signature, loop 895 off += r; 896 continue; 897 } 898 899 done = bufferContainsSignature(bos, off, r, ddLen); 900 if (!done) { 901 off = cacheBytesRead(bos, off, r, ddLen); 902 } 903 } 904 905 final byte[] b = bos.toByteArray(); 906 lastStoredEntry = new ByteArrayInputStream(b); 907 } 908 909 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 910 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 911 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 912 913 /** 914 * Checks whether the current buffer contains the signature of a 915 * "data descriptor", "local file header" or 916 * "central directory entry". 917 * 918 * <p>If it contains such a signature, reads the data descriptor 919 * and positions the stream right after the data descriptor.</p> 920 */ 921 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 922 throws IOException { 923 924 boolean done = false; 925 int readTooMuch = 0; 926 for (int i = 0; !done && i < offset + lastRead - 4; i++) { 927 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 928 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 929 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 930 // found a LFH or CFH: 931 readTooMuch = offset + lastRead - i - expectedDDLen; 932 done = true; 933 } 934 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 935 // found DD: 936 readTooMuch = offset + lastRead - i; 937 done = true; 938 } 939 if (done) { 940 // * push back bytes read in excess as well as the data 941 // descriptor 942 // * copy the remaining bytes to cache 943 // * read data descriptor 944 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 945 bos.write(buf.array(), 0, i); 946 readDataDescriptor(); 947 } 948 } 949 } 950 return done; 951 } 952 953 /** 954 * If the last read bytes could hold a data descriptor and an 955 * incomplete signature then save the last bytes to the front of 956 * the buffer and cache everything in front of the potential data 957 * descriptor into the given ByteArrayOutputStream. 958 * 959 * <p>Data descriptor plus incomplete signature (3 bytes in the 960 * worst case) can be 20 bytes max.</p> 961 */ 962 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 963 final int cacheable = offset + lastRead - expecteDDLen - 3; 964 if (cacheable > 0) { 965 bos.write(buf.array(), 0, cacheable); 966 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 967 offset = expecteDDLen + 3; 968 } else { 969 offset += lastRead; 970 } 971 return offset; 972 } 973 974 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 975 ((PushbackInputStream) in).unread(buf, offset, length); 976 pushedBackBytes(length); 977 } 978 979 // End of Central Directory Record 980 // end of central dir signature WORD 981 // number of this disk SHORT 982 // number of the disk with the 983 // start of the central directory SHORT 984 // total number of entries in the 985 // central directory on this disk SHORT 986 // total number of entries in 987 // the central directory SHORT 988 // size of the central directory WORD 989 // offset of start of central 990 // directory with respect to 991 // the starting disk number WORD 992 // .ZIP file comment length SHORT 993 // .ZIP file comment up to 64KB 994 // 995 996 /** 997 * Reads the stream until it find the "End of central directory 998 * record" and consumes it as well. 999 */ 1000 private void skipRemainderOfArchive() throws IOException { 1001 // skip over central directory. One LFH has been read too much 1002 // already. The calculation discounts file names and extra 1003 // data so it will be too short. 1004 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 1005 findEocdRecord(); 1006 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 1007 readFully(shortBuf); 1008 // file comment 1009 realSkip(ZipShort.getValue(shortBuf)); 1010 } 1011 1012 /** 1013 * Reads forward until the signature of the "End of central 1014 * directory" record is found. 1015 */ 1016 private void findEocdRecord() throws IOException { 1017 int currentByte = -1; 1018 boolean skipReadCall = false; 1019 while (skipReadCall || (currentByte = readOneByte()) > -1) { 1020 skipReadCall = false; 1021 if (!isFirstByteOfEocdSig(currentByte)) { 1022 continue; 1023 } 1024 currentByte = readOneByte(); 1025 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 1026 if (currentByte == -1) { 1027 break; 1028 } 1029 skipReadCall = isFirstByteOfEocdSig(currentByte); 1030 continue; 1031 } 1032 currentByte = readOneByte(); 1033 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 1034 if (currentByte == -1) { 1035 break; 1036 } 1037 skipReadCall = isFirstByteOfEocdSig(currentByte); 1038 continue; 1039 } 1040 currentByte = readOneByte(); 1041 if (currentByte == -1 1042 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1043 break; 1044 } 1045 skipReadCall = isFirstByteOfEocdSig(currentByte); 1046 } 1047 } 1048 1049 /** 1050 * Skips bytes by reading from the underlying stream rather than 1051 * the (potentially inflating) archive stream - which {@link 1052 * #skip} would do. 1053 * 1054 * Also updates bytes-read counter. 1055 */ 1056 private void realSkip(final long value) throws IOException { 1057 if (value >= 0) { 1058 long skipped = 0; 1059 while (skipped < value) { 1060 final long rem = value - skipped; 1061 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1062 if (x == -1) { 1063 return; 1064 } 1065 count(x); 1066 skipped += x; 1067 } 1068 return; 1069 } 1070 throw new IllegalArgumentException(); 1071 } 1072 1073 /** 1074 * Reads bytes by reading from the underlying stream rather than 1075 * the (potentially inflating) archive stream - which {@link #read} would do. 1076 * 1077 * Also updates bytes-read counter. 1078 */ 1079 private int readOneByte() throws IOException { 1080 final int b = in.read(); 1081 if (b != -1) { 1082 count(1); 1083 } 1084 return b; 1085 } 1086 1087 private boolean isFirstByteOfEocdSig(final int b) { 1088 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1089 } 1090 1091 /** 1092 * Structure collecting information for the entry that is 1093 * currently being read. 1094 */ 1095 private static final class CurrentEntry { 1096 1097 /** 1098 * Current ZIP entry. 1099 */ 1100 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1101 1102 /** 1103 * Does the entry use a data descriptor? 1104 */ 1105 private boolean hasDataDescriptor; 1106 1107 /** 1108 * Does the entry have a ZIP64 extended information extra field. 1109 */ 1110 private boolean usesZip64; 1111 1112 /** 1113 * Number of bytes of entry content read by the client if the 1114 * entry is STORED. 1115 */ 1116 private long bytesRead; 1117 1118 /** 1119 * Number of bytes of entry content read from the stream. 1120 * 1121 * <p>This may be more than the actual entry's length as some 1122 * stuff gets buffered up and needs to be pushed back when the 1123 * end of the entry has been reached.</p> 1124 */ 1125 private long bytesReadFromStream; 1126 1127 /** 1128 * The checksum calculated as the current entry is read. 1129 */ 1130 private final CRC32 crc = new CRC32(); 1131 1132 /** 1133 * The input stream decompressing the data for shrunk and imploded entries. 1134 */ 1135 private InputStream in; 1136 } 1137 1138 /** 1139 * Bounded input stream adapted from commons-io 1140 */ 1141 private class BoundedInputStream extends InputStream { 1142 1143 /** the wrapped input stream */ 1144 private final InputStream in; 1145 1146 /** the max length to provide */ 1147 private final long max; 1148 1149 /** the number of bytes already returned */ 1150 private long pos = 0; 1151 1152 /** 1153 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1154 * stream and limits it to a certain size. 1155 * 1156 * @param in The wrapped input stream 1157 * @param size The maximum number of bytes to return 1158 */ 1159 public BoundedInputStream(final InputStream in, final long size) { 1160 this.max = size; 1161 this.in = in; 1162 } 1163 1164 @Override 1165 public int read() throws IOException { 1166 if (max >= 0 && pos >= max) { 1167 return -1; 1168 } 1169 final int result = in.read(); 1170 pos++; 1171 count(1); 1172 current.bytesReadFromStream++; 1173 return result; 1174 } 1175 1176 @Override 1177 public int read(final byte[] b) throws IOException { 1178 return this.read(b, 0, b.length); 1179 } 1180 1181 @Override 1182 public int read(final byte[] b, final int off, final int len) throws IOException { 1183 if (max >= 0 && pos >= max) { 1184 return -1; 1185 } 1186 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1187 final int bytesRead = in.read(b, off, (int) maxRead); 1188 1189 if (bytesRead == -1) { 1190 return -1; 1191 } 1192 1193 pos += bytesRead; 1194 count(bytesRead); 1195 current.bytesReadFromStream += bytesRead; 1196 return bytesRead; 1197 } 1198 1199 @Override 1200 public long skip(final long n) throws IOException { 1201 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1202 final long skippedBytes = IOUtils.skip(in, toSkip); 1203 pos += skippedBytes; 1204 return skippedBytes; 1205 } 1206 1207 @Override 1208 public int available() throws IOException { 1209 if (max >= 0 && pos >= max) { 1210 return 0; 1211 } 1212 return in.available(); 1213 } 1214 } 1215}