pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/commit/b44898299a2ed97045c270f6474785da2ff07ced

69241e157469407.css" /> gh-89739: gh-77140: Support zip64 in zipimport (GH-94146) · python/cpython@b448982 · GitHub
Skip to content

Commit b448982

Browse files
thatchitamaro
andauthored
gh-89739: gh-77140: Support zip64 in zipimport (GH-94146)
* Reads zip64 files as produced by the zipfile module * Include tests (somewhat slow, however, because of the need to create "large" zips) * About the same amount of strictness reading invalid zip files as zipfile has * Still works on files with prepended data (like pex) There are a lot more test cases at https://github.com/thatch/zipimport64/ that give me confidence that this works for real-world files. Fixes #89739 and #77140. --------- Co-authored-by: Itamar Ostricher <itamarost@gmail.com> Reviewed-by: Gregory P. Smith <greg@krypto.org>
1 parent 2cedd25 commit b448982

File tree

6 files changed

+154
-39
lines changed

6 files changed

+154
-39
lines changed

Doc/library/zipimport.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for
3030
corresponding :file:`.pyc` file, meaning that if a ZIP archive
3131
doesn't contain :file:`.pyc` files, importing may be rather slow.
3232

33+
.. versionchanged:: 3.13
34+
ZIP64 is supported
35+
3336
.. versionchanged:: 3.8
3437
Previously, ZIP archives with an archive comment were not supported.
3538

Doc/whatsnew/3.13.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,12 @@ xml.etree.ElementTree
700700
:func:`~xml.etree.ElementTree.iterparse` for explicit cleaning up.
701701
(Contributed by Serhiy Storchaka in :gh:`69893`.)
702702

703+
zipimport
704+
---------
705+
706+
* Gains support for ZIP64 format files. Everybody loves huge code right?
707+
(Contributed by Tim Hatch in :gh:`94146`.)
708+
703709

704710
Optimizations
705711
=============

Lib/importlib/_bootstrap_external.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ def _pack_uint32(x):
8181
return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little')
8282

8383

84+
def _unpack_uint64(data):
85+
"""Convert 8 bytes in little-endian to an integer."""
86+
assert len(data) == 8
87+
return int.from_bytes(data, 'little')
88+
8489
def _unpack_uint32(data):
8590
"""Convert 4 bytes in little-endian to an integer."""
8691
assert len(data) == 4

Lib/test/test_zipimport.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ def makeZip(self, files, zipName=TEMP_ZIP, **kw):
128128
f.write(stuff)
129129
f.write(data)
130130

131+
def getZip64Files(self):
132+
# This is the simplest way to make zipfile generate the zip64 EOCD block
133+
return {f"f{n}.py": (NOW, test_src) for n in range(65537)}
134+
131135
def doTest(self, expected_ext, files, *modules, **kw):
132136
self.makeZip(files, **kw)
133137

@@ -798,6 +802,14 @@ def testLargestPossibleComment(self):
798802
files = {TESTMOD + ".py": (NOW, test_src)}
799803
self.doTest(".py", files, TESTMOD, comment=b"c" * ((1 << 16) - 1))
800804

805+
def testZip64(self):
806+
files = self.getZip64Files()
807+
self.doTest(".py", files, "f6")
808+
809+
def testZip64CruftAndComment(self):
810+
files = self.getZip64Files()
811+
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
812+
801813

802814
@support.requires_zlib()
803815
class CompressedZipImportTestCase(UncompressedZipImportTestCase):

Lib/zipimport.py

Lines changed: 127 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#from importlib import _bootstrap_external
1616
#from importlib import _bootstrap # for _verbose_message
1717
import _frozen_importlib_external as _bootstrap_external
18-
from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
18+
from _frozen_importlib_external import _unpack_uint16, _unpack_uint32, _unpack_uint64
1919
import _frozen_importlib as _bootstrap # for _verbose_message
2020
import _imp # for check_hash_based_pycs
2121
import _io # for open
@@ -40,8 +40,14 @@ class ZipImportError(ImportError):
4040
_module_type = type(sys)
4141

4242
END_CENTRAL_DIR_SIZE = 22
43-
STRING_END_ARCHIVE = b'PK\x05\x06'
43+
END_CENTRAL_DIR_SIZE_64 = 56
44+
END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
45+
STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature
46+
STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature
47+
STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature
4448
MAX_COMMENT_LEN = (1 << 16) - 1
49+
MAX_UINT32 = 0xffffffff
50+
ZIP64_EXTRA_TAG = 0x1
4551

4652
class zipimporter(_bootstrap_external._LoaderBasics):
4753
"""zipimporter(archivepath) -> zipimporter object
@@ -356,49 +362,72 @@ def _read_directory(archive):
356362
# to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
357363
start_offset = fp.tell()
358364
try:
365+
# Check if there's a comment.
359366
try:
360-
fp.seek(-END_CENTRAL_DIR_SIZE, 2)
361-
header_position = fp.tell()
362-
buffer = fp.read(END_CENTRAL_DIR_SIZE)
367+
fp.seek(0, 2)
368+
file_size = fp.tell()
363369
except OSError:
364-
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
365-
if len(buffer) != END_CENTRAL_DIR_SIZE:
366-
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
367-
if buffer[:4] != STRING_END_ARCHIVE:
368-
# Bad: End of Central Dir signature
369-
# Check if there's a comment.
370-
try:
371-
fp.seek(0, 2)
372-
file_size = fp.tell()
373-
except OSError:
374-
raise ZipImportError(f"can't read Zip file: {archive!r}",
375-
path=archive)
376-
max_comment_start = max(file_size - MAX_COMMENT_LEN -
377-
END_CENTRAL_DIR_SIZE, 0)
378-
try:
379-
fp.seek(max_comment_start)
380-
data = fp.read()
381-
except OSError:
382-
raise ZipImportError(f"can't read Zip file: {archive!r}",
383-
path=archive)
384-
pos = data.rfind(STRING_END_ARCHIVE)
385-
if pos < 0:
386-
raise ZipImportError(f'not a Zip file: {archive!r}',
387-
path=archive)
370+
raise ZipImportError(f"can't read Zip file: {archive!r}",
371+
path=archive)
372+
max_comment_plus_dirs_size = (
373+
MAX_COMMENT_LEN + END_CENTRAL_DIR_SIZE +
374+
END_CENTRAL_DIR_SIZE_64 + END_CENTRAL_DIR_LOCATOR_SIZE_64)
375+
max_comment_start = max(file_size - max_comment_plus_dirs_size, 0)
376+
try:
377+
fp.seek(max_comment_start)
378+
data = fp.read(max_comment_plus_dirs_size)
379+
except OSError:
380+
raise ZipImportError(f"can't read Zip file: {archive!r}",
381+
path=archive)
382+
pos = data.rfind(STRING_END_ARCHIVE)
383+
pos64 = data.rfind(STRING_END_ZIP_64)
384+
385+
if (pos64 >= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
386+
# Zip64 at "correct" offset from standard EOCD
387+
buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64]
388+
if len(buffer) != END_CENTRAL_DIR_SIZE_64:
389+
raise ZipImportError(
390+
f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte "
391+
f"zip64 central directory, but read {len(buffer)} bytes.",
392+
path=archive)
393+
header_position = file_size - len(data) + pos64
394+
395+
central_directory_size = _unpack_uint64(buffer[40:48])
396+
central_directory_position = _unpack_uint64(buffer[48:56])
397+
num_entries = _unpack_uint64(buffer[24:32])
398+
elif pos >= 0:
388399
buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
389400
if len(buffer) != END_CENTRAL_DIR_SIZE:
390401
raise ZipImportError(f"corrupt Zip file: {archive!r}",
391402
path=archive)
403+
392404
header_position = file_size - len(data) + pos
393405

394-
header_size = _unpack_uint32(buffer[12:16])
395-
header_offset = _unpack_uint32(buffer[16:20])
396-
if header_position < header_size:
406+
# Buffer now contains a valid EOCD, and header_position gives the
407+
# starting position of it.
408+
central_directory_size = _unpack_uint32(buffer[12:16])
409+
central_directory_position = _unpack_uint32(buffer[16:20])
410+
num_entries = _unpack_uint16(buffer[8:10])
411+
412+
# N.b. if someday you want to prefer the standard (non-zip64) EOCD,
413+
# you need to adjust position by 76 for arc to be 0.
414+
else:
415+
raise ZipImportError(f'not a Zip file: {archive!r}',
416+
path=archive)
417+
418+
# Buffer now contains a valid EOCD, and header_position gives the
419+
# starting position of it.
420+
# XXX: These are cursory checks but are not as exact or strict as they
421+
# could be. Checking the arc-adjusted value is probably good too.
422+
if header_position < central_directory_size:
397423
raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
398-
if header_position < header_offset:
424+
if header_position < central_directory_position:
399425
raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
400-
header_position -= header_size
401-
arc_offset = header_position - header_offset
426+
header_position -= central_directory_size
427+
# On just-a-zipfile these values are the same and arc_offset is zero; if
428+
# the file has some bytes prepended, `arc_offset` is the number of such
429+
# bytes. This is used for pex as well as self-extracting .exe.
430+
arc_offset = header_position - central_directory_position
402431
if arc_offset < 0:
403432
raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
404433

@@ -415,6 +444,11 @@ def _read_directory(archive):
415444
raise EOFError('EOF read where not expected')
416445
# Start of file header
417446
if buffer[:4] != b'PK\x01\x02':
447+
if count != num_entries:
448+
raise ZipImportError(
449+
f"mismatched num_entries: {count} should be {num_entries} in {archive!r}",
450+
path=archive,
451+
)
418452
break # Bad: Central Dir File Header
419453
if len(buffer) != 46:
420454
raise EOFError('EOF read where not expected')
@@ -430,9 +464,6 @@ def _read_directory(archive):
430464
comment_size = _unpack_uint16(buffer[32:34])
431465
file_offset = _unpack_uint32(buffer[42:46])
432466
header_size = name_size + extra_size + comment_size
433-
if file_offset > header_offset:
434-
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
435-
file_offset += arc_offset
436467

437468
try:
438469
name = fp.read(name_size)
@@ -444,7 +475,10 @@ def _read_directory(archive):
444475
# slower than reading the data because fseek flushes stdio's
445476
# internal buffers. See issue #8745.
446477
try:
447-
if len(fp.read(header_size - name_size)) != header_size - name_size:
478+
extra_data_len = header_size - name_size
479+
extra_data = memoryview(fp.read(extra_data_len))
480+
481+
if len(extra_data) != extra_data_len:
448482
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
449483
except OSError:
450484
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
@@ -461,6 +495,60 @@ def _read_directory(archive):
461495

462496
name = name.replace('/', path_sep)
463497
path = _bootstrap_external._path_join(archive, name)
498+
499+
# Ordering matches unpacking below.
500+
if (
501+
file_size == MAX_UINT32 or
502+
data_size == MAX_UINT32 or
503+
file_offset == MAX_UINT32
504+
):
505+
# need to decode extra_data looking for a zip64 extra (which might not
506+
# be present)
507+
while extra_data:
508+
if len(extra_data) < 4:
509+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
510+
tag = _unpack_uint16(extra_data[:2])
511+
size = _unpack_uint16(extra_data[2:4])
512+
if len(extra_data) < 4 + size:
513+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
514+
if tag == ZIP64_EXTRA_TAG:
515+
if (len(extra_data) - 4) % 8 != 0:
516+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
517+
num_extra_values = (len(extra_data) - 4) // 8
518+
if num_extra_values > 3:
519+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
520+
values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
521+
extra_data, offset=4)
522+
523+
# N.b. Here be dragons: the ordering of these is different than
524+
# the header fields, and it's really easy to get it wrong since
525+
# naturally-occuring zips that use all 3 are >4GB
526+
if file_size == MAX_UINT32:
527+
file_size = values.pop(0)
528+
if data_size == MAX_UINT32:
529+
data_size = values.pop(0)
530+
if file_offset == MAX_UINT32:
531+
file_offset = values.pop(0)
532+
533+
break
534+
535+
# For a typical zip, this bytes-slicing only happens 2-3 times, on
536+
# small data like timestamps and filesizes.
537+
extra_data = extra_data[4+size:]
538+
else:
539+
_bootstrap._verbose_message(
540+
"zipimport: suspected zip64 but no zip64 extra for {!r}",
541+
path,
542+
)
543+
# XXX These two statements seem swapped because `central_directory_position`
544+
# is a position within the actual file, but `file_offset` (when compared) is
545+
# as encoded in the entry, not adjusted for this file.
546+
# N.b. this must be after we've potentially read the zip64 extra which can
547+
# change `file_offset`.
548+
if file_offset > central_directory_position:
549+
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
550+
file_offset += arc_offset
551+
464552
t = (path, compress, data_size, file_size, file_offset, time, date, crc)
465553
files[name] = t
466554
count += 1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The :mod:`zipimport` module can now read ZIP64 files.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy