From 859532b05844a0eb3efd641303a08d4424edb30e Mon Sep 17 00:00:00 2001 From: Allan Lei Date: Sun, 8 Dec 2013 12:38:24 +0800 Subject: [PATCH] changes --- .gitignore | 2 + .travis.yml | 10 + README.markdown | 73 ++++++- example.py | 25 +++ setup.py | 21 +- tests/__init__.py | 0 tests/test_pointerio.py | 80 ++++++++ tests/test_zipstream.py | 59 ++++++ tox.ini | 6 + zipstream.py | 412 ---------------------------------------- zipstream/__init__.py | 407 +++++++++++++++++++++++++++++++++++++++ zipstream/compat.py | 68 +++++++ 12 files changed, 734 insertions(+), 429 deletions(-) create mode 100644 .travis.yml create mode 100644 example.py create mode 100644 tests/__init__.py create mode 100644 tests/test_pointerio.py create mode 100644 tests/test_zipstream.py create mode 100644 tox.ini delete mode 100644 zipstream.py create mode 100644 zipstream/__init__.py create mode 100644 zipstream/compat.py diff --git a/.gitignore b/.gitignore index ded6067..6564c25 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,5 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +*.zip \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..f174f46 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "pypy" +install: + - "pip install ." +script: nosetests \ No newline at end of file diff --git a/README.markdown b/README.markdown index 584ea25..6eee279 100644 --- a/README.markdown +++ b/README.markdown @@ -1,10 +1,12 @@ -# ZipStream +# python-zipstream -zipstream.py is a zip archive generator based on zipfile.py. It was created to -generate a zip file on-the-fly for download in a web.py (http://webpy.org/) -application. This is beneficial for when you want to provide a downloadable -archive of a large collection of regular files, which would be infeasible to +[![Build Status](https://travis-ci.org/allanlei/python-zipstream.png?branch=master)](https://travis-ci.org/allanlei/python-zipstream) +[![Coverage Status](https://coveralls.io/repos/allanlei/python-zipstream/badge.png)](https://coveralls.io/r/allanlei/python-zipstream) + +zipstream.py is a zip archive generator based on python 3.3's zipfile.py. It was created to +generate a zip file generator for streaming (ie web apps). This is beneficial for when you +want to provide a downloadable archive of a large collection of regular files, which would be infeasible to generate the archive prior to downloading. The archive is generated as an iterator of strings, which, when joined, form @@ -37,10 +39,63 @@ archives. ## Requirements - * Python >=2.6 + * Python 2.6, 2.7, 3.2, 3.3, pypy + +## Examples + +### flask + +```python +from flask import Response + +@app.route('/package.zip', methods=['GET'], endpoint='zipball') +def zipball(): + def generator(): + z = ZipStream(mode='w', compression=ZIP_DEFLATED) + + z.write('/path/to/file') -## License + for chunk in z: + yield chunk -This library was created by SpiderOak, Inc. and is released under the GPLv3. -Copyright 2008-2013 SpiderOak Inc. + response = Response(generator(), mimetype='application/zip') + response.headers['Content-Disposition'] = 'attachment; filename={}'.format('files.zip') + return response + +# or + +@app.route('/package.zip', methods=['GET'], endpoint='zipball') +def zipball(): + z = ZipStream(mode='w', compression=ZIP_DEFLATED) + z.write('/path/to/file') + + response = Response(z, mimetype='application/zip') + response.headers['Content-Disposition'] = 'attachment; filename={}'.format('files.zip') + return response +``` + +### django 1.5+ + +```python +from django.http import StreamingHttpResponse +def zipball(request): + z = ZipStream(mode='w', compression=ZIP_DEFLATED) + z.write('/path/to/file') + + response = StreamingHttpResponse(z, mimetype='application/zip') + response['Content-Disposition'] = 'attachment; filename={}'.format('files.zip') + return response +``` + +### webpy + +```python +def GET(self): + path = '/path/to/dir/of/files' + zip_filename = 'files.zip' + web.header('Content-type' , 'application/zip') + web.header('Content-Disposition', 'attachment; filename="%s"' % ( + zip_filename,)) + return ZipStream(path) +``` diff --git a/example.py b/example.py new file mode 100644 index 0000000..55aef2a --- /dev/null +++ b/example.py @@ -0,0 +1,25 @@ +import os +import zipstream +import zipfile + + +f = open('test.zip', 'wb') + +with zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED) as z: + z.write('LICENSE') + z.write('LICENSE', arcname='stuff/LICENSE') + + for root, directories, files in os.walk('zipstream'): + for filename in files: + path = os.path.join(root, filename) + z.write(path, path) + + with open('test.zip', 'wb') as f: + for chunk in z: + f.write(chunk) + +f.close() + + +with zipfile.ZipFile('test.zip') as z: + z.testzip() diff --git a/setup.py b/setup.py index f326ffe..17e5da2 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,18 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- +from setuptools import setup, find_packages +import zipstream -from distutils.core import setup setup( name='zipstream', - version='1.0.1', - description='SpiderOak ZipStream Module', - author='SpiderOak Team', - author_email='code@spideroak.com', - url='http://www.spideroak.com', - py_modules=['zipstream'], + version=zipstream.__version__, + description='Zipfile generator', + author='Allan Lei', + author_email='allanlei@helveticode.com', + url='https://github.com/allanlei/python-zipstream', + packages=find_packages(), + keywords='zip streaming', + + test_suite='nose.collector', + tests_require = ['nose'], ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pointerio.py b/tests/test_pointerio.py new file mode 100644 index 0000000..02d3075 --- /dev/null +++ b/tests/test_pointerio.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import unittest +import zipstream + + +class PointerIOTestCase(unittest.TestCase): + def test_init_no_args(self): + zipstream.PointerIO() + + def test_init_mode(self): + try: + zipstream.PointerIO('wb') + except Exception as err: + self.fail(err) + + for mode in ['w', 'r', 'rb', 'a', 'ab']: + self.assertRaises(Exception, zipstream.PointerIO, mode=mode) + + for mode in ['w', 'wb''r', 'rb', 'a', 'ab']: + self.assertRaises(Exception, zipstream.PointerIO, mode=mode + '+') + + def test_has_fileobj_attrs(self): + fileobj = zipstream.PointerIO() + + self.assertTrue(hasattr(fileobj, 'write')) + self.assertTrue(hasattr(fileobj, 'close')) + self.assertTrue(hasattr(fileobj, 'tell')) + + def test_write_bytes(self): + fileobj = zipstream.PointerIO() + data = b'Im a little tea pot' + try: + fileobj.write(data) + except Exception as err: + self.fail(err) + self.assertEqual(fileobj.tell(), 19) + + def test_write_unicode(self): + fileobj = zipstream.PointerIO() + data = 'Im a little tea pot' + try: + fileobj.write(data) + except Exception as err: + self.fail(err) + self.assertEqual(fileobj.tell(), 19) + + + fileobj = zipstream.PointerIO() + data = '幋 儳鑤 寱懤擨 拻敁柧' + try: + fileobj.write(data) + except Exception as err: + self.fail(err) + self.assertEqual(fileobj.tell(), 30) + + def test_write_non_string_type(self): + fileobj = zipstream.PointerIO() + data = None + self.assertRaises(TypeError, fileobj.write, data) + + fileobj = zipstream.PointerIO() + data = [] + self.assertRaises(TypeError, fileobj.write, data) + + fileobj = zipstream.PointerIO() + data = tuple() + self.assertRaises(TypeError, fileobj.write, data) + + fileobj = zipstream.PointerIO() + data = 1 + self.assertRaises(TypeError, fileobj.write, data) + + fileobj = zipstream.PointerIO() + data = 1.00 + self.assertRaises(TypeError, fileobj.write, data) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_zipstream.py b/tests/test_zipstream.py new file mode 100644 index 0000000..56e1ef2 --- /dev/null +++ b/tests/test_zipstream.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals, print_function + +import os +import tempfile +import unittest +import zipstream +import zipfile + + +class ZipInfoTestCase(unittest.TestCase): + pass + + +class ZipStreamTestCase(unittest.TestCase): + def setUp(self): + self.fileobjs = [ + tempfile.NamedTemporaryFile(delete=False, suffix='.txt'), + tempfile.NamedTemporaryFile(delete=False, suffix='.py'), + ] + + def tearDown(self): + for fileobj in self.fileobjs: + fileobj.close() + os.remove(fileobj.name) + + def test_init_no_args(self): + zipstream.ZipFile() + + def test_init_mode(self): + try: + zipstream.ZipFile(mode='w') + except Exception as err: + self.fail(err) + + for mode in ['wb', 'r', 'rb', 'a', 'ab']: + self.assertRaises(Exception, zipstream.ZipFile, mode=mode) + + for mode in ['wb', 'r', 'rb', 'a', 'ab']: + self.assertRaises(Exception, zipstream.ZipFile, mode=mode + '+') + + def test_write_file(self): + z = zipstream.ZipFile(mode='w') + for fileobj in self.fileobjs: + z.write(fileobj.name) + + f = tempfile.NamedTemporaryFile(suffix='zip', delete=False) + for chunk in z: + f.write(chunk) + f.close() + + z2 = zipfile.ZipFile(f.name, 'r') + z2.testzip() + + os.remove(f.name) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..88984c8 --- /dev/null +++ b/tox.ini @@ -0,0 +1,6 @@ +[tox] +envlist = py26, py27, py32, py33, pypy + +[testenv] +deps=nose +commands = nosetests {posargs} \ No newline at end of file diff --git a/zipstream.py b/zipstream.py deleted file mode 100644 index a285897..0000000 --- a/zipstream.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python - -""" -Iterable ZIP archive generator. - -Derived directly from zipfile.py -""" -import struct, os, time, sys -import binascii - -try: - import zlib # We may need its compression method -except ImportError: - zlib = None - -__all__ = ["ZIP_STORED", "ZIP_DEFLATED", "ZipStream"] - - -ZIP64_LIMIT= (1 << 31) - 1 - -# constants for Zip file compression methods -ZIP_STORED = 0 -ZIP_DEFLATED = 8 -# Other ZIP compression methods not supported - -# Here are some struct module formats for reading headers -structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes -stringEndArchive = "PK\005\006" # magic number for end of archive record -structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes -stringCentralDir = "PK\001\002" # magic number for central directory -structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes -stringFileHeader = "PK\003\004" # magic number for file header -structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes -stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header -structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes -stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header -stringDataDescriptor = "PK\x07\x08" # magic number for data descriptor - -# indexes of entries in the central directory structure -_CD_SIGNATURE = 0 -_CD_CREATE_VERSION = 1 -_CD_CREATE_SYSTEM = 2 -_CD_EXTRACT_VERSION = 3 -_CD_EXTRACT_SYSTEM = 4 # is this meaningful? -_CD_FLAG_BITS = 5 -_CD_COMPRESS_TYPE = 6 -_CD_TIME = 7 -_CD_DATE = 8 -_CD_CRC = 9 -_CD_COMPRESSED_SIZE = 10 -_CD_UNCOMPRESSED_SIZE = 11 -_CD_FILENAME_LENGTH = 12 -_CD_EXTRA_FIELD_LENGTH = 13 -_CD_COMMENT_LENGTH = 14 -_CD_DISK_NUMBER_START = 15 -_CD_INTERNAL_FILE_ATTRIBUTES = 16 -_CD_EXTERNAL_FILE_ATTRIBUTES = 17 -_CD_LOCAL_HEADER_OFFSET = 18 - -# indexes of entries in the local file header structure -_FH_SIGNATURE = 0 -_FH_EXTRACT_VERSION = 1 -_FH_EXTRACT_SYSTEM = 2 # is this meaningful? -_FH_GENERAL_PURPOSE_FLAG_BITS = 3 -_FH_COMPRESSION_METHOD = 4 -_FH_LAST_MOD_TIME = 5 -_FH_LAST_MOD_DATE = 6 -_FH_CRC = 7 -_FH_COMPRESSED_SIZE = 8 -_FH_UNCOMPRESSED_SIZE = 9 -_FH_FILENAME_LENGTH = 10 -_FH_EXTRA_FIELD_LENGTH = 11 - - -class ZipInfo (object): - """Class with attributes describing each file in the ZIP archive.""" - - __slots__ = ( - 'orig_filename', - 'filename', - 'date_time', - 'compress_type', - 'comment', - 'extra', - 'create_system', - 'create_version', - 'extract_version', - 'reserved', - 'flag_bits', - 'volume', - 'internal_attr', - 'external_attr', - 'header_offset', - 'CRC', - 'compress_size', - 'file_size', - ) - - def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): - self.orig_filename = filename # Original file name in archive - - # Terminate the file name at the first null byte. Null bytes in file - # names are used as tricks by viruses in archives. - null_byte = filename.find(chr(0)) - if null_byte >= 0: - filename = filename[0:null_byte] - # This is used to ensure paths in generated ZIP files always use - # forward slashes as the directory separator, as required by the - # ZIP format specification. - if os.sep != "/" and os.sep in filename: - filename = filename.replace(os.sep, "/") - - self.filename = filename # Normalized file name - self.date_time = date_time # year, month, day, hour, min, sec - # Standard values: - self.compress_type = ZIP_STORED # Type of compression for the file - self.comment = "" # Comment for each file - self.extra = "" # ZIP extra data - if sys.platform == 'win32': - self.create_system = 0 # System which created ZIP archive - else: - # Assume everything else is unix-y - self.create_system = 3 # System which created ZIP archive - self.create_version = 20 # Version which created ZIP archive - self.extract_version = 20 # Version needed to extract archive - self.reserved = 0 # Must be zero - self.flag_bits = 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor - self.volume = 0 # Volume number of file header - self.internal_attr = 0 # Internal attributes - self.external_attr = 0 # External file attributes - # Other attributes are set by class ZipFile: - # header_offset Byte offset to the file header - # CRC CRC-32 of the uncompressed file - # compress_size Size of the compressed file - # file_size Size of the uncompressed file - - def DataDescriptor(self): - if self.compress_size > ZIP64_LIMIT or self.file_size > ZIP64_LIMIT: - fmt = "<4slQQ" - else: - fmt = "<4slLL" - return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size) - - def FileHeader(self): - """Return the per-file header as a string.""" - dt = self.date_time - dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] - dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) - if self.flag_bits & 0x08: - # Set these to zero because we write them after the file data - CRC = compress_size = file_size = 0 - else: - CRC = self.CRC - compress_size = self.compress_size - file_size = self.file_size - - extra = self.extra - - if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: - # File is larger than what fits into a 4 byte integer, - # fall back to the ZIP64 extension - fmt = ' ZIP64_LIMIT or zinfo.compress_size > ZIP64_LIMIT: - extra.append(zinfo.file_size) - extra.append(zinfo.compress_size) - file_size = 0xffffffff #-1 - compress_size = 0xffffffff #-1 - else: - file_size = zinfo.file_size - compress_size = zinfo.compress_size - - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) - header_offset = -1 # struct "l" format: 32 one bits - else: - header_offset = zinfo.header_offset - - extra_data = zinfo.extra - if extra: - # Append a ZIP64 field to the extra's - extra_data = struct.pack(' ZIP64_LIMIT: - # Need to write the ZIP64 end-of-archive records - zip64endrec = struct.pack(structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) - data.append( self.update_data_ptr(zip64endrec)) - - zip64locrec = struct.pack(structEndArchive64Locator, - stringEndArchive64Locator, 0, pos2, 1) - data.append( self.update_data_ptr(zip64locrec)) - - # XXX Why is `pos3` computed next? It's never referenced. - pos3 = self.data_ptr - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, count, count, pos2 - pos1, -1, 0) - data.append( self.update_data_ptr(endrec)) - - else: - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, count, count, pos2 - pos1, pos1, 0) - data.append( self.update_data_ptr(endrec)) - - return ''.join(data) - - -if __name__ == "__main__": - zipfile = sys.argv[1] - path = sys.argv[2] - - zf = open(zipfile, 'wb') - - for data in ZipStream(path): - zf.write(data) - - zf.close() - diff --git a/zipstream/__init__.py b/zipstream/__init__.py new file mode 100644 index 0000000..40744f7 --- /dev/null +++ b/zipstream/__init__.py @@ -0,0 +1,407 @@ +# -*- coding: utf-8 -*- +""" +Iterable ZIP archive generator. + +Derived directly from zipfile.py +""" +from __future__ import unicode_literals, print_function, with_statement + +__version__ = '1.0.2' + +import os +import sys +import stat +import struct +import time +import zipfile + +from .compat import ( + str, bytes, + ZIP64_VERSION, + ZIP_BZIP2, BZIP2_VERSION, + ZIP_LZMA, LZMA_VERSION) + +from zipfile import ( + ZIP_STORED, ZIP64_LIMIT, ZIP_FILECOUNT_LIMIT, ZIP_MAX_COMMENT, + ZIP_DEFLATED, + structCentralDir, structEndArchive64, structEndArchive, structEndArchive64Locator, + stringCentralDir, stringEndArchive64, stringEndArchive, stringEndArchive64Locator, + structFileHeader, stringFileHeader, + zlib, crc32) + +stringDataDescriptor = b'PK\x07\x08' # magic number for data descriptor + + +def _get_compressor(compress_type): + if compress_type == ZIP_DEFLATED: + return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + elif compress_type == ZIP_BZIP2: + from zipfile import bz2 + return bz2.BZ2Compressor() + elif compress_type == ZIP_LZMA: + from zipfile import LZMACompressor + return LZMACompressor() + else: + return None + + +class PointerIO(object): + def __init__(self, mode='wb'): + if mode not in ('wb', ): + raise RuntimeError('zipstream.ZipFile() requires mode "wb"') + self.data_pointer = 0 + self.__mode = mode + self.__closed = False + + @property + def mode(self): + return self.__mode + + @property + def closed(self): + return self.__closed + + def close(self): + self.__closed = True + + def flush(self): + pass + + def next(self): + raise NotImplementedError() + + def seek(self, offset, whence): + raise NotImplementedError() + + def tell(self): + return self.data_pointer + + def truncate(size=None): + raise NotImplementedError() + + def write(self, data): + if self.closed: + raise ValueError('I/O operation on closed file') + + if isinstance(data, str): + data = data.encode('utf-8') + if not isinstance(data, bytes): + raise TypeError('expected bytes') + self.data_pointer += len(data) + return data + + +class ZipInfo(zipfile.ZipInfo): + def __init__(self, *args, **kwargs): + zipfile.ZipInfo.__init__(self, *args, **kwargs) + self.flag_bits = 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor + + def FileHeader(self, zip64=None): + """Return the per-file header as a string.""" + dt = self.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + if self.flag_bits & 0x08: + # Set these to zero because we write them after the file data + CRC = compress_size = file_size = 0 + else: + CRC = self.CRC + compress_size = self.compress_size + file_size = self.file_size + + extra = self.extra + + min_version = 0 + if zip64 is None: + zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT + if zip64: + fmt = ' ZIP64_LIMIT or compress_size > ZIP64_LIMIT: + if not zip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + # File is larger than what fits into a 4 byte integer, + # fall back to the ZIP64 extension + file_size = 0xffffffff + compress_size = 0xffffffff + min_version = ZIP64_VERSION + + if self.compress_type == ZIP_BZIP2: + min_version = max(BZIP2_VERSION, min_version) + elif self.compress_type == ZIP_LZMA: + min_version = max(LZMA_VERSION, min_version) + + self.extract_version = max(min_version, self.extract_version) + self.create_version = max(min_version, self.create_version) + filename, flag_bits = self._encodeFilenameFlags() + header = struct.pack(structFileHeader, stringFileHeader, + self.extract_version, self.reserved, flag_bits, + self.compress_type, dostime, dosdate, CRC, + compress_size, file_size, + len(filename), len(extra)) + return header + filename + extra + + def DataDescriptor(self): + """ + crc-32 4 bytes + compressed size 4 bytes + uncompressed size 4 bytes + """ + if self.compress_size > ZIP64_LIMIT or self.file_size > ZIP64_LIMIT: + fmt = b'<4sLQQ' + else: + fmt = b'<4sLLL' + return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size) + + +class ZipFile(zipfile.ZipFile): + def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=False): + """Open the ZIP file with mode write "w".""" + if mode not in ('w', ): + raise RuntimeError('zipstream.ZipFile() requires mode "w"') + if fileobj is None: + fileobj = PointerIO() + + self._comment = b'' + zipfile.ZipFile.__init__(self, fileobj, mode=mode, compression=compression, allowZip64=allowZip64) + # TODO: Refractor to write queue with args + kwargs matching write() + self.paths_to_write = [] + + def __iter__(self): + for args, kwargs in self.paths_to_write: + for data in self.__write(*args, **kwargs): + yield data + for data in self.__close(): + yield data + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + + @property + def comment(self): + """The comment text associated with the ZIP file.""" + return self._comment + + @comment.setter + def comment(self, comment): + if not isinstance(comment, bytes): + raise TypeError("comment: expected bytes, got %s" % type(comment)) + # check for valid comment length + if len(comment) >= ZIP_MAX_COMMENT: + if self.debug: + print('Archive comment is too long; truncating to %d bytes' + % ZIP_MAX_COMMENT) + comment = comment[:ZIP_MAX_COMMENT] + self._comment = comment + self._didModify = True + + def write(self, filename, arcname=None, compress_type=None): + # TODO: Reflect python's Zipfile.write + # - if filename is file, write as file + # - if filename is directory, write an empty directory + self.paths_to_write.append( + ((filename, ), {'arcname': arcname, 'compress_type': compress_type}), + ) + + def __write(self, filename, arcname=None, compress_type=None): + """Put the bytes from filename into the archive under the name + arcname.""" + if not self.fp: + raise RuntimeError( + "Attempt to write to ZIP archive that was already closed") + + st = os.stat(filename) + isdir = stat.S_ISDIR(st.st_mode) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + if isdir: + arcname += '/' + zinfo = ZipInfo(arcname, date_time) + zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes + if compress_type is None: + zinfo.compress_type = self.compression + else: + zinfo.compress_type = compress_type + + zinfo.file_size = st.st_size + zinfo.flag_bits = 0x00 + zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + + self._writecheck(zinfo) + self._didModify = True + + if isdir: + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + yield self.fp.write(zinfo.FileHeader(False)) + return + + cmpr = _get_compressor(zinfo.compress_type) + with open(filename, 'rb') as fp: + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + zinfo.file_size * 1.05 > ZIP64_LIMIT + yield self.fp.write(zinfo.FileHeader(zip64)) + file_size = 0 + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = crc32(buf, CRC) & 0xffffffff + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + yield self.fp.write(buf) + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + yield self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + if not zip64 and self._allowZip64: + if file_size > ZIP64_LIMIT: + raise RuntimeError('File size has increased during compressing') + if compress_size > ZIP64_LIMIT: + raise RuntimeError('Compressed size larger than uncompressed size') + + # Seek backwards and write file header (which will now include + # correct CRC and file sizes) + # position = self.fp.tell() # Preserve current position in file + # self.fp.seek(zinfo.header_offset, 0) + # self.fp.write(zinfo.FileHeader(zip64)) + # self.fp.seek(position, 0) + yield self.fp.write(zinfo.DataDescriptor()) + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + + def __close(self): + """Close the file, and for mode "w" write the ending + records.""" + if self.fp is None: + return + + try: + if self.mode in ('w', 'a') and self._didModify: # write ending records + count = 0 + pos1 = self.fp.tell() + for zinfo in self.filelist: # write central directory + count = count + 1 + dt = zinfo.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff + compress_size = 0xffffffff + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = 0xffffffff + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + min_version = 0 + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + b'= ZIP_FILECOUNT_LIMIT or + centDirOffset > ZIP64_LIMIT or + centDirSize > ZIP64_LIMIT): + # Need to write the ZIP64 end-of-archive records + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset) + yield self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + yield self.fp.write(zip64locrec) + centDirCount = min(centDirCount, 0xFFFF) + centDirSize = min(centDirSize, 0xFFFFFFFF) + centDirOffset = min(centDirOffset, 0xFFFFFFFF) + + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset, len(self._comment)) + yield self.fp.write(endrec) + yield self.fp.write(self._comment) + self.fp.flush() + finally: + fp = self.fp + self.fp = None + if not self._filePassed: + fp.close() diff --git a/zipstream/compat.py b/zipstream/compat.py new file mode 100644 index 0000000..6a89930 --- /dev/null +++ b/zipstream/compat.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +""" +pythoncompat + +Copied from +""" + +import sys + +# ------- +# Pythons +# ------- + + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + + +# --------- +# Specifics +# --------- + +if PY2: + builtin_str = str + bytes = str + str = unicode + basestring = basestring + numeric_types = (int, long, float) + + +elif PY3: + builtin_str = str + str = str + bytes = bytes + basestring = (str, bytes) + numeric_types = (int, float) + + +try: + from zipfile import ZIP64_VERSION +except ImportError: + ZIP64_VERSION = 45 + +try: + from zipfile import BZIP2_VERSION +except ImportError: + BZIP2_VERSION = 46 + +try: + from zipfile import ZIP_BZIP2 +except ImportError: + ZIP_BZIP2 = 12 + +try: + from zipfile import LZMA_VERSION +except ImportError: + LZMA_VERSION = 63 + +try: + from zipfile import ZIP_LZMA +except ImportError: + ZIP_LZMA = 14 + +try: + from zipfile import ZIP_MAX_COMMENT +except ImportError: + ZIP_MAX_COMMENT = (1 << 16) - 1 \ No newline at end of file