From 869b34e9b3be3a4cfcb3a145f218ffd3f5e3fd79 Mon Sep 17 00:00:00 2001
From: Florian Apolloner <florian@apolloner.eu>
Date: Fri, 19 Jul 2019 17:04:53 +0200
Subject: [PATCH] [1.11.x] Fixed CVE-2019-14235 -- Fixed potential memory
 exhaustion in django.utils.encoding.uri_to_iri().

Thanks to Guido Vranken for initial report.
---
 django/utils/encoding.py           | 17 ++++++++++-------
 docs/releases/1.11.23.txt          | 10 ++++++++++
 tests/utils_tests/test_encoding.py | 12 +++++++++++-
 3 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/django/utils/encoding.py b/django/utils/encoding.py
index 999ffae19a..a29ef2be58 100644
--- a/django/utils/encoding.py
+++ b/django/utils/encoding.py
@@ -237,13 +237,16 @@ def repercent_broken_unicode(path):
     we need to re-percent-encode any octet produced that is not part of a
     strictly legal UTF-8 octet sequence.
     """
-    try:
-        path.decode('utf-8')
-    except UnicodeDecodeError as e:
-        repercent = quote(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
-        path = repercent_broken_unicode(
-            path[:e.start] + force_bytes(repercent) + path[e.end:])
-    return path
+    while True:
+        try:
+            path.decode('utf-8')
+        except UnicodeDecodeError as e:
+            # CVE-2019-14235: A recursion shouldn't be used since the exception
+            # handling uses massive amounts of memory
+            repercent = quote(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
+            path = path[:e.start] + force_bytes(repercent) + path[e.end:]
+        else:
+            return path
 
 
 def filepath_to_uri(path):
diff --git a/docs/releases/1.11.23.txt b/docs/releases/1.11.23.txt
index 03b33ebf63..04acca90f1 100644
--- a/docs/releases/1.11.23.txt
+++ b/docs/releases/1.11.23.txt
@@ -45,3 +45,13 @@ CVE-2019-14234: SQL injection possibility in key and index lookups for ``JSONFie
 <hstorefield.key>` for :class:`~django.contrib.postgres.fields.HStoreField`
 were subject to SQL injection, using a suitably crafted dictionary, with
 dictionary expansion, as the ``**kwargs`` passed to ``QuerySet.filter()``.
+
+CVE-2019-14235: Potential memory exhaustion in ``django.utils.encoding.uri_to_iri()``
+=====================================================================================
+
+If passed certain inputs, :func:`django.utils.encoding.uri_to_iri` could lead
+to significant memory usage due to excessive recursion when re-percent-encoding
+invalid UTF-8 octet sequences.
+
+``uri_to_iri()`` now avoids recursion when re-percent-encoding invalid UTF-8
+octet sequences.
diff --git a/tests/utils_tests/test_encoding.py b/tests/utils_tests/test_encoding.py
index 688b46194d..2b4bcff870 100644
--- a/tests/utils_tests/test_encoding.py
+++ b/tests/utils_tests/test_encoding.py
@@ -2,12 +2,13 @@
 from __future__ import unicode_literals
 
 import datetime
+import sys
 import unittest
 
 from django.utils import six
 from django.utils.encoding import (
     escape_uri_path, filepath_to_uri, force_bytes, force_text, iri_to_uri,
-    smart_text, uri_to_iri,
+    repercent_broken_unicode, smart_text, uri_to_iri,
 )
 from django.utils.functional import SimpleLazyObject
 from django.utils.http import urlquote_plus
@@ -76,6 +77,15 @@ class TestEncodingUtils(unittest.TestCase):
         self.assertEqual(smart_text(1), '1')
         self.assertEqual(smart_text('foo'), 'foo')
 
+    def test_repercent_broken_unicode_recursion_error(self):
+        # Prepare a string long enough to force a recursion error if the tested
+        # function uses recursion.
+        data = b'\xfc' * sys.getrecursionlimit()
+        try:
+            self.assertEqual(repercent_broken_unicode(data), b'%FC' * sys.getrecursionlimit())
+        except RecursionError:
+            self.fail('Unexpected RecursionError raised.')
+
 
 class TestRFC3987IEncodingUtils(unittest.TestCase):