From 1e39d0f6280abf34c7719db5e7ed1c333f5e5919 Mon Sep 17 00:00:00 2001 From: Benjamin Richter Date: Sun, 25 Jan 2015 23:22:46 +0100 Subject: [PATCH] [1.4.x] Fixed #24158 -- Allowed GZipMiddleware to work with streaming responses Backport of django.utils.text.compress_sequence and fix for django.middleware.gzip.GZipMiddleware when using iterators as response.content. --- django/middleware/gzip.py | 25 +++++++++++------ django/utils/text.py | 33 +++++++++++++++++++++++ docs/releases/1.4.19.txt | 16 +++++++++++ docs/releases/index.txt | 1 + tests/regressiontests/middleware/tests.py | 13 +++++++++ 5 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 docs/releases/1.4.19.txt diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py index 69f938cf0a..eb4d8bff42 100644 --- a/django/middleware/gzip.py +++ b/django/middleware/gzip.py @@ -1,6 +1,6 @@ import re -from django.utils.text import compress_string +from django.utils.text import compress_string, compress_sequence from django.utils.cache import patch_vary_headers re_accepts_gzip = re.compile(r'\bgzip\b') @@ -12,8 +12,9 @@ class GZipMiddleware(object): on the Accept-Encoding header. """ def process_response(self, request, response): + # The response object can tell us whether content is a string or an iterable # It's not worth attempting to compress really short responses. - if len(response.content) < 200: + if not response._base_content_is_iter and len(response.content) < 200: return response patch_vary_headers(response, ('Accept-Encoding',)) @@ -32,15 +33,23 @@ class GZipMiddleware(object): if not re_accepts_gzip.search(ae): return response - # Return the compressed content only if it's actually shorter. - compressed_content = compress_string(response.content) - if len(compressed_content) >= len(response.content): - return response + # The response object can tell us whether content is a string or an iterable + if response._base_content_is_iter: + # If the response content is iterable we don't know the length, so delete the header. + del response['Content-Length'] + # Wrap the response content in a streaming gzip iterator (direct access to inner response._container) + response.content = compress_sequence(response._container) + else: + # Return the compressed content only if it's actually shorter. + compressed_content = compress_string(response.content) + if len(compressed_content) >= len(response.content): + return response + response.content = compressed_content + response['Content-Length'] = str(len(response.content)) if response.has_header('ETag'): response['ETag'] = re.sub('"$', ';gzip"', response['ETag']) - response.content = compressed_content response['Content-Encoding'] = 'gzip' - response['Content-Length'] = str(len(response.content)) + return response diff --git a/django/utils/text.py b/django/utils/text.py index eaafb96d7c..8e43dc9652 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -286,6 +286,39 @@ def compress_string(s): ustring_re = re.compile(u"([\u0080-\uffff])") +# Backported from django 1.5 +class StreamingBuffer(object): + def __init__(self): + self.vals = [] + + def write(self, val): + self.vals.append(val) + + def read(self): + ret = ''.join(self.vals) + self.vals = [] + return ret + + def flush(self): + return + + def close(self): + return + +# Backported from django 1.5 +# Like compress_string, but for iterators of strings. +def compress_sequence(sequence): + buf = StreamingBuffer() + zfile = GzipFile(mode='wb', compresslevel=6, fileobj=buf) + # Output headers... + yield buf.read() + for item in sequence: + zfile.write(item) + zfile.flush() + yield buf.read() + zfile.close() + yield buf.read() + def javascript_quote(s, quote_double_quotes=False): def fix(match): diff --git a/docs/releases/1.4.19.txt b/docs/releases/1.4.19.txt new file mode 100644 index 0000000000..da813fa7eb --- /dev/null +++ b/docs/releases/1.4.19.txt @@ -0,0 +1,16 @@ +=========================== +Django 1.4.19 release notes +=========================== + +*Under development* + +Django 1.4.19 fixes a regression in the 1.4.18 security release. + +Bugfixes +======== + +* ``GZipMiddleware`` now supports streaming responses. As part of the 1.4.18 + security release, the ``django.views.static.serve()`` function was altered + to stream the files it serves. Unfortunately, the ``GZipMiddleware`` consumed + the stream prematurely and prevented files from being served properly + (`#24158 `_). diff --git a/docs/releases/index.txt b/docs/releases/index.txt index 98f69adc20..58b32f05c6 100644 --- a/docs/releases/index.txt +++ b/docs/releases/index.txt @@ -19,6 +19,7 @@ Final releases .. toctree:: :maxdepth: 1 + 1.4.19 1.4.18 1.4.17 1.4.16 diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py index 138ee50e43..87b19fb6da 100644 --- a/tests/regressiontests/middleware/tests.py +++ b/tests/regressiontests/middleware/tests.py @@ -514,6 +514,7 @@ class GZipMiddlewareTest(TestCase): short_string = "This string is too short to be worth compressing." compressible_string = 'a' * 500 uncompressible_string = ''.join(chr(random.randint(0, 255)) for _ in xrange(500)) + iterator_as_content = iter(compressible_string) def setUp(self): self.req = HttpRequest() @@ -589,6 +590,18 @@ class GZipMiddlewareTest(TestCase): self.assertEqual(r.content, self.uncompressible_string) self.assertEqual(r.get('Content-Encoding'), None) + def test_streaming_compression(self): + """ + Tests that iterators as response content return a compressed stream without consuming + the whole response.content while doing so. + See #24158. + """ + self.resp.content = self.iterator_as_content + r = GZipMiddleware().process_response(self.req, self.resp) + self.assertEqual(self.decompress(''.join(r.content)), self.compressible_string) + self.assertEqual(r.get('Content-Encoding'), 'gzip') + self.assertEqual(r.get('Content-Length'), None) + class ETagGZipMiddlewareTest(TestCase): """