diff --git a/django/core/validators.py b/django/core/validators.py index a40af0c8dd..a93c6ac975 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -1,3 +1,4 @@ +import platform import re import urllib2 import urlparse @@ -39,10 +40,6 @@ class RegexValidator(object): if not self.regex.search(smart_unicode(value)): raise ValidationError(self.message, code=self.code) -class HeadRequest(urllib2.Request): - def get_method(self): - return "HEAD" - class URLValidator(RegexValidator): regex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// @@ -52,7 +49,8 @@ class URLValidator(RegexValidator): r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) - def __init__(self, verify_exists=False, validator_user_agent=URL_VALIDATOR_USER_AGENT): + def __init__(self, verify_exists=False, + validator_user_agent=URL_VALIDATOR_USER_AGENT): super(URLValidator, self).__init__() self.verify_exists = verify_exists self.user_agent = validator_user_agent @@ -76,6 +74,7 @@ class URLValidator(RegexValidator): else: url = value + #This is deprecated and will be removed in a future release. if self.verify_exists: headers = { "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", @@ -88,21 +87,36 @@ class URLValidator(RegexValidator): broken_error = ValidationError( _(u'This URL appears to be a broken link.'), code='invalid_link') try: - req = HeadRequest(url, None, headers) - u = urllib2.urlopen(req) + req = urllib2.Request(url, None, headers) + req.get_method = lambda: 'HEAD' + #Create an opener that does not support local file access + opener = urllib2.OpenerDirector() + + #Don't follow redirects, but don't treat them as errors either + error_nop = lambda *args, **kwargs: True + http_error_processor = urllib2.HTTPErrorProcessor() + http_error_processor.http_error_301 = error_nop + http_error_processor.http_error_302 = error_nop + http_error_processor.http_error_307 = error_nop + + handlers = [urllib2.UnknownHandler(), + urllib2.HTTPHandler(), + urllib2.HTTPDefaultErrorHandler(), + urllib2.FTPHandler(), + http_error_processor] + try: + import ssl + handlers.append(urllib2.HTTPSHandler()) + except: + #Python isn't compiled with SSL support + pass + map(opener.add_handler, handlers) + if platform.python_version_tuple() >= (2, 6): + opener.open(req, timeout=10) + else: + opener.open(req) except ValueError: raise ValidationError(_(u'Enter a valid URL.'), code='invalid') - except urllib2.HTTPError, e: - if e.code in (405, 501): - # Try a GET request (HEAD refused) - # See also: http://www.w3.org/Protocols/rfc2616/rfc2616.html - try: - req = urllib2.Request(url, None, headers) - u = urllib2.urlopen(req) - except: - raise broken_error - else: - raise broken_error except: # urllib2.URLError, httplib.InvalidURL, etc. raise broken_error diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index 8081cf3954..dfacb667c7 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -1119,7 +1119,7 @@ class TimeField(Field): class URLField(CharField): description = _("URL") - def __init__(self, verbose_name=None, name=None, verify_exists=True, **kwargs): + def __init__(self, verbose_name=None, name=None, verify_exists=False, **kwargs): kwargs['max_length'] = kwargs.get('max_length', 200) CharField.__init__(self, verbose_name, name, **kwargs) self.validators.append(validators.URLValidator(verify_exists=verify_exists)) diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index c7f8bcbcc0..3f0f998b4a 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -108,6 +108,12 @@ their deprecation, as per the :ref:`Django deprecation policy beyond that of a simple ``TextField`` since the removal of oldforms. All uses of ``XMLField`` can be replaced with ``TextField``. + * ``django.db.models.fields.URLField.verify_exists`` has been + deprecated due to intractable security and performance + issues. Validation behavior has been removed in 1.4, and the + argument will be removed in 1.5. + + * 1.5 * The ``mod_python`` request handler has been deprecated since the 1.3 release. The ``mod_wsgi`` handler should be used instead. diff --git a/docs/ref/forms/fields.txt b/docs/ref/forms/fields.txt index 59a6df82d0..11647b33b2 100644 --- a/docs/ref/forms/fields.txt +++ b/docs/ref/forms/fields.txt @@ -756,6 +756,11 @@ Takes the following optional arguments: If ``True``, the validator will attempt to load the given URL, raising ``ValidationError`` if the page gives a 404. Defaults to ``False``. +.. deprecated:: 1.3.1 + + ``verify_exists`` was deprecated for security reasons and will be + removed in 1.4. This deprecation also removes ``validator_user_agent``. + .. attribute:: URLField.validator_user_agent String used as the user-agent used when checking for a URL's existence. diff --git a/docs/ref/models/fields.txt b/docs/ref/models/fields.txt index 2fb5d494b1..36e2b109b8 100644 --- a/docs/ref/models/fields.txt +++ b/docs/ref/models/fields.txt @@ -831,14 +831,21 @@ shortcuts. ``URLField`` ------------ -.. class:: URLField([verify_exists=True, max_length=200, **options]) +.. class:: URLField([verify_exists=False, max_length=200, **options]) A :class:`CharField` for a URL. Has one extra optional argument: +.. deprecated:: 1.3.1 + + ``verify_exists`` is deprecated for security reasons as of 1.3.1 + and will be removed in 1.4. Prior to 1.3.1, the default value was + ``True``. + .. attribute:: URLField.verify_exists - If ``True`` (the default), the URL given will be checked for existence - (i.e., the URL actually loads and doesn't give a 404 response). + If ``True``, the URL given will be checked for existence (i.e., + the URL actually loads and doesn't give a 404 response) using a + ``HEAD`` request. Redirects are allowed, but will not be followed. Note that when you're using the single-threaded development server, validating a URL being served by the same server will hang. This should not diff --git a/docs/ref/settings.txt b/docs/ref/settings.txt index 175e50818c..18155f19fc 100644 --- a/docs/ref/settings.txt +++ b/docs/ref/settings.txt @@ -1892,16 +1892,6 @@ to ensure your processes are running in the correct environment. .. _See available choices: http://www.postgresql.org/docs/8.1/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE -.. setting:: URL_VALIDATOR_USER_AGENT - -URL_VALIDATOR_USER_AGENT ------------------------- - -Default: ``Django/ (http://www.djangoproject.com/)`` - -The string to use as the ``User-Agent`` header when checking to see if URLs -exist (see the ``verify_exists`` option on :class:`~django.db.models.URLField`). - .. setting:: USE_ETAGS USE_ETAGS @@ -2095,3 +2085,19 @@ TEST_DATABASE_NAME This setting has been replaced by :setting:`TEST_NAME` in :setting:`DATABASES`. + + +URL_VALIDATOR_USER_AGENT +------------------------ + +.. deprecated:: 1.3.1 + This setting has been removed due to intractable performance and + security problems. + +Default: ``Django/ (http://www.djangoproject.com/)`` + +The string to use as the ``User-Agent`` header when checking to see if +URLs exist (see the ``verify_exists`` option on +:class:`~django.db.models.URLField`). This setting was deprecated in +1.3.1 along with ``verify_exists`` and will be removed in 1.4. + diff --git a/tests/modeltests/validation/__init__.py b/tests/modeltests/validation/__init__.py index c8a89cd36f..31c5000ba6 100644 --- a/tests/modeltests/validation/__init__.py +++ b/tests/modeltests/validation/__init__.py @@ -1,8 +1,8 @@ -from django.utils import unittest +from django.test import TestCase from django.core.exceptions import ValidationError -class ValidationTestCase(unittest.TestCase): +class ValidationTestCase(TestCase): def assertFailsValidation(self, clean, failed_fields): self.assertRaises(ValidationError, clean) try: diff --git a/tests/modeltests/validation/models.py b/tests/modeltests/validation/models.py index 861d1440fe..91ad87203d 100644 --- a/tests/modeltests/validation/models.py +++ b/tests/modeltests/validation/models.py @@ -15,6 +15,7 @@ class ModelToValidate(models.Model): parent = models.ForeignKey('self', blank=True, null=True, limit_choices_to={'number': 10}) email = models.EmailField(blank=True) url = models.URLField(blank=True) + url_verify = models.URLField(blank=True, verify_exists=True) f_with_custom_validator = models.IntegerField(blank=True, null=True, validators=[validate_answer_to_universe]) def clean(self): diff --git a/tests/modeltests/validation/tests.py b/tests/modeltests/validation/tests.py index 4236d8e7c4..eaf130be29 100644 --- a/tests/modeltests/validation/tests.py +++ b/tests/modeltests/validation/tests.py @@ -53,25 +53,22 @@ class BaseModelValidationTests(ValidationTestCase): mtv = ModelToValidate(number=10, name='Some Name', url='not a url') self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'Enter a valid value.']) + #The tests below which use url_verify are deprecated def test_correct_url_but_nonexisting_gives_404(self): - mtv = ModelToValidate(number=10, name='Some Name', url='http://google.com/we-love-microsoft.html') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) + mtv = ModelToValidate(number=10, name='Some Name', url_verify='http://qa-dev.w3.org/link-testsuite/http.php?code=404') + self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url_verify', [u'This URL appears to be a broken link.']) def test_correct_url_value_passes(self): - mtv = ModelToValidate(number=10, name='Some Name', url='http://www.example.com/') + mtv = ModelToValidate(number=10, name='Some Name', url_verify='http://www.google.com/') + self.assertEqual(None, mtv.full_clean()) # This will fail if there's no Internet connection + + def test_correct_url_with_redirect(self): + mtv = ModelToValidate(number=10, name='Some Name', url_verify='http://qa-dev.w3.org/link-testsuite/http.php?code=301') #example.com is a redirect to iana.org now self.assertEqual(None, mtv.full_clean()) # This will fail if there's no Internet connection def test_correct_https_url_but_nonexisting(self): - mtv = ModelToValidate(number=10, name='Some Name', url='https://www.example.com/') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) - - def test_correct_ftp_url_but_nonexisting(self): - mtv = ModelToValidate(number=10, name='Some Name', url='ftp://ftp.google.com/we-love-microsoft.html') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) - - def test_correct_ftps_url_but_nonexisting(self): - mtv = ModelToValidate(number=10, name='Some Name', url='ftps://ftp.google.com/we-love-microsoft.html') - self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url', [u'This URL appears to be a broken link.']) + mtv = ModelToValidate(number=10, name='Some Name', url_verify='https://www.example.com/') + self.assertFieldFailsValidationWithMessage(mtv.full_clean, 'url_verify', [u'This URL appears to be a broken link.']) def test_text_greater_that_charfields_max_length_raises_erros(self): mtv = ModelToValidate(number=10, name='Some Name'*100) diff --git a/tests/regressiontests/forms/tests/fields.py b/tests/regressiontests/forms/tests/fields.py index f76e7327eb..e963cf2f1b 100644 --- a/tests/regressiontests/forms/tests/fields.py +++ b/tests/regressiontests/forms/tests/fields.py @@ -567,7 +567,7 @@ class FieldsTests(TestCase): f.clean('http://www.broken.djangoproject.com') # bad domain except ValidationError, e: self.assertEqual("[u'This URL appears to be a broken link.']", str(e)) - self.assertRaises(ValidationError, f.clean, 'http://google.com/we-love-microsoft.html') # good domain, bad page + self.assertRaises(ValidationError, f.clean, 'http://qa-dev.w3.org/link-testsuite/http.php?code=400') # good domain, bad page try: f.clean('http://google.com/we-love-microsoft.html') # good domain, bad page except ValidationError, e: @@ -626,16 +626,10 @@ class FieldsTests(TestCase): self.assertEqual("[u'This URL appears to be a broken link.']", str(e)) def test_urlfield_10(self): - # UTF-8 char in path, enclosed by a monkey-patch to make sure - # the encoding is passed to urllib2.urlopen + # UTF-8 in the domain. f = URLField(verify_exists=True) - try: - _orig_urlopen = urllib2.urlopen - urllib2.urlopen = lambda req: True - url = u'http://t\xfcr.djangoproject.com/' - self.assertEqual(url, f.clean(url)) - finally: - urllib2.urlopen = _orig_urlopen + url = u'http://\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac.idn.icann.org/\u0391\u03c1\u03c7\u03b9\u03ba\u03ae_\u03c3\u03b5\u03bb\u03af\u03b4\u03b1' + self.assertEqual(url, f.clean(url)) #This will fail without internet. # BooleanField ################################################################