Fixed #32492 -- Added TrigramWordSimilarity() and TrigramWordDistance() on PostgreSQL.
This commit is contained in:
parent
4ca508a689
commit
4e4082f939
1
AUTHORS
1
AUTHORS
@ -710,6 +710,7 @@ answer newbie questions, and generally made Django that much better:
|
|||||||
Nicola Larosa <nico@teknico.net>
|
Nicola Larosa <nico@teknico.net>
|
||||||
Nicolas Lara <nicolaslara@gmail.com>
|
Nicolas Lara <nicolaslara@gmail.com>
|
||||||
Nicolas Noé <nicolas@niconoe.eu>
|
Nicolas Noé <nicolas@niconoe.eu>
|
||||||
|
Nikita Marchant <nikita.marchant@gmail.com>
|
||||||
Niran Babalola <niran@niran.org>
|
Niran Babalola <niran@niran.org>
|
||||||
Nis Jørgensen <nis@superlativ.dk>
|
Nis Jørgensen <nis@superlativ.dk>
|
||||||
Nowell Strite <https://nowell.strite.org/>
|
Nowell Strite <https://nowell.strite.org/>
|
||||||
|
@ -13,7 +13,7 @@ from django.test.signals import setting_changed
|
|||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
from .indexes import OpClass
|
from .indexes import OpClass
|
||||||
from .lookups import SearchLookup, TrigramSimilar, Unaccent
|
from .lookups import SearchLookup, TrigramSimilar, TrigramWordSimilar, Unaccent
|
||||||
from .serializers import RangeSerializer
|
from .serializers import RangeSerializer
|
||||||
from .signals import register_type_handlers
|
from .signals import register_type_handlers
|
||||||
|
|
||||||
@ -33,6 +33,8 @@ def uninstall_if_needed(setting, value, enter, **kwargs):
|
|||||||
TextField._unregister_lookup(SearchLookup)
|
TextField._unregister_lookup(SearchLookup)
|
||||||
CharField._unregister_lookup(TrigramSimilar)
|
CharField._unregister_lookup(TrigramSimilar)
|
||||||
TextField._unregister_lookup(TrigramSimilar)
|
TextField._unregister_lookup(TrigramSimilar)
|
||||||
|
CharField._unregister_lookup(TrigramWordSimilar)
|
||||||
|
TextField._unregister_lookup(TrigramWordSimilar)
|
||||||
# Disconnect this receiver until the next time this app is installed
|
# Disconnect this receiver until the next time this app is installed
|
||||||
# and ready() connects it again to prevent unnecessary processing on
|
# and ready() connects it again to prevent unnecessary processing on
|
||||||
# each setting change.
|
# each setting change.
|
||||||
@ -65,5 +67,7 @@ class PostgresConfig(AppConfig):
|
|||||||
TextField.register_lookup(SearchLookup)
|
TextField.register_lookup(SearchLookup)
|
||||||
CharField.register_lookup(TrigramSimilar)
|
CharField.register_lookup(TrigramSimilar)
|
||||||
TextField.register_lookup(TrigramSimilar)
|
TextField.register_lookup(TrigramSimilar)
|
||||||
|
CharField.register_lookup(TrigramWordSimilar)
|
||||||
|
TextField.register_lookup(TrigramWordSimilar)
|
||||||
MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
|
MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
|
||||||
IndexExpression.register_wrappers(OrderBy, OpClass, Collate)
|
IndexExpression.register_wrappers(OrderBy, OpClass, Collate)
|
||||||
|
@ -58,3 +58,8 @@ class SearchLookup(SearchVectorExact):
|
|||||||
class TrigramSimilar(PostgresOperatorLookup):
|
class TrigramSimilar(PostgresOperatorLookup):
|
||||||
lookup_name = 'trigram_similar'
|
lookup_name = 'trigram_similar'
|
||||||
postgres_operator = '%%'
|
postgres_operator = '%%'
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramWordSimilar(PostgresOperatorLookup):
|
||||||
|
lookup_name = 'trigram_word_similar'
|
||||||
|
postgres_operator = '%%>'
|
||||||
|
@ -293,6 +293,15 @@ class TrigramBase(Func):
|
|||||||
super().__init__(expression, string, **extra)
|
super().__init__(expression, string, **extra)
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramWordBase(Func):
|
||||||
|
output_field = FloatField()
|
||||||
|
|
||||||
|
def __init__(self, string, expression, **extra):
|
||||||
|
if not hasattr(string, 'resolve_expression'):
|
||||||
|
string = Value(string)
|
||||||
|
super().__init__(string, expression, **extra)
|
||||||
|
|
||||||
|
|
||||||
class TrigramSimilarity(TrigramBase):
|
class TrigramSimilarity(TrigramBase):
|
||||||
function = 'SIMILARITY'
|
function = 'SIMILARITY'
|
||||||
|
|
||||||
@ -300,3 +309,12 @@ class TrigramSimilarity(TrigramBase):
|
|||||||
class TrigramDistance(TrigramBase):
|
class TrigramDistance(TrigramBase):
|
||||||
function = ''
|
function = ''
|
||||||
arg_joiner = ' <-> '
|
arg_joiner = ' <-> '
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramWordDistance(TrigramWordBase):
|
||||||
|
function = ''
|
||||||
|
arg_joiner = ' <<-> '
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramWordSimilarity(TrigramWordBase):
|
||||||
|
function = 'WORD_SIMILARITY'
|
||||||
|
@ -14,9 +14,8 @@ returns results that have a similarity measurement greater than the current
|
|||||||
similarity threshold.
|
similarity threshold.
|
||||||
|
|
||||||
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
|
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
|
||||||
and activate the `pg_trgm extension
|
and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
|
||||||
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
|
extension using the
|
||||||
install the extension using the
|
|
||||||
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||||
operation.
|
operation.
|
||||||
|
|
||||||
@ -26,6 +25,31 @@ The ``trigram_similar`` lookup can be used on
|
|||||||
>>> City.objects.filter(name__trigram_similar="Middlesborough")
|
>>> City.objects.filter(name__trigram_similar="Middlesborough")
|
||||||
['<City: Middlesbrough>']
|
['<City: Middlesbrough>']
|
||||||
|
|
||||||
|
.. fieldlookup:: trigram_word_similar
|
||||||
|
|
||||||
|
.. versionadded:: 4.0
|
||||||
|
|
||||||
|
The ``trigram_word_similar`` lookup allows you to perform trigram word
|
||||||
|
similarity lookups using a dedicated PostgreSQL extension. It can be
|
||||||
|
approximately understood as measuring the greatest number of trigrams shared
|
||||||
|
between the parameter and any substring of the field. A trigram word lookup is
|
||||||
|
given an expression and returns results that have a word similarity measurement
|
||||||
|
greater than the current similarity threshold.
|
||||||
|
|
||||||
|
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
|
||||||
|
and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
|
||||||
|
extension using the
|
||||||
|
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||||
|
operation.
|
||||||
|
|
||||||
|
The ``trigram_word_similar`` lookup can be used on
|
||||||
|
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
|
||||||
|
|
||||||
|
>>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
|
||||||
|
['<Sentence: Gumby rides on the path of Middlesbrough>']
|
||||||
|
|
||||||
|
.. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html
|
||||||
|
|
||||||
``Unaccent``
|
``Unaccent``
|
||||||
============
|
============
|
||||||
|
|
||||||
|
@ -280,8 +280,9 @@ Trigram similarity
|
|||||||
==================
|
==================
|
||||||
|
|
||||||
Another approach to searching is trigram similarity. A trigram is a group of
|
Another approach to searching is trigram similarity. A trigram is a group of
|
||||||
three consecutive characters. In addition to the :lookup:`trigram_similar`
|
three consecutive characters. In addition to the :lookup:`trigram_similar` and
|
||||||
lookup, you can use a couple of other expressions.
|
:lookup:`trigram_word_similar` lookups, you can use a couple of other
|
||||||
|
expressions.
|
||||||
|
|
||||||
To use them, you need to activate the `pg_trgm extension
|
To use them, you need to activate the `pg_trgm extension
|
||||||
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
|
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
|
||||||
@ -308,6 +309,27 @@ Usage example::
|
|||||||
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
||||||
[<Author: Katy Stevens>, <Author: Stephen Keats>]
|
[<Author: Katy Stevens>, <Author: Stephen Keats>]
|
||||||
|
|
||||||
|
``TrigramWordSimilarity``
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
.. versionadded:: 4.0
|
||||||
|
|
||||||
|
.. class:: TrigramWordSimilarity(string, expression, **extra)
|
||||||
|
|
||||||
|
Accepts a string or expression, and a field name or expression. Returns the
|
||||||
|
trigram word similarity between the two arguments.
|
||||||
|
|
||||||
|
Usage example::
|
||||||
|
|
||||||
|
>>> from django.contrib.postgres.search import TrigramWordSimilarity
|
||||||
|
>>> Author.objects.create(name='Katy Stevens')
|
||||||
|
>>> Author.objects.create(name='Stephen Keats')
|
||||||
|
>>> test = 'Kat'
|
||||||
|
>>> Author.objects.annotate(
|
||||||
|
... similarity=TrigramWordSimilarity(test, 'name'),
|
||||||
|
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
||||||
|
[<Author: Katy Stevens>]
|
||||||
|
|
||||||
``TrigramDistance``
|
``TrigramDistance``
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
@ -326,3 +348,24 @@ Usage example::
|
|||||||
... distance=TrigramDistance('name', test),
|
... distance=TrigramDistance('name', test),
|
||||||
... ).filter(distance__lte=0.7).order_by('distance')
|
... ).filter(distance__lte=0.7).order_by('distance')
|
||||||
[<Author: Katy Stevens>, <Author: Stephen Keats>]
|
[<Author: Katy Stevens>, <Author: Stephen Keats>]
|
||||||
|
|
||||||
|
``TrigramWordDistance``
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
.. versionadded:: 4.0
|
||||||
|
|
||||||
|
.. class:: TrigramWordDistance(string, expression, **extra)
|
||||||
|
|
||||||
|
Accepts a string or expression, and a field name or expression. Returns the
|
||||||
|
trigram word distance between the two arguments.
|
||||||
|
|
||||||
|
Usage example::
|
||||||
|
|
||||||
|
>>> from django.contrib.postgres.search import TrigramWordDistance
|
||||||
|
>>> Author.objects.create(name='Katy Stevens')
|
||||||
|
>>> Author.objects.create(name='Stephen Keats')
|
||||||
|
>>> test = 'Kat'
|
||||||
|
>>> Author.objects.annotate(
|
||||||
|
... distance=TrigramWordDistance(test, 'name'),
|
||||||
|
... ).filter(distance__lte=0.7).order_by('distance')
|
||||||
|
[<Author: Katy Stevens>]
|
||||||
|
@ -200,6 +200,13 @@ Minor features
|
|||||||
expression allows using subqueries to construct lists of values on
|
expression allows using subqueries to construct lists of values on
|
||||||
PostgreSQL.
|
PostgreSQL.
|
||||||
|
|
||||||
|
* The new :lookup:`trigram_word_similar` lookup, and the
|
||||||
|
:class:`TrigramWordDistance()
|
||||||
|
<django.contrib.postgres.search.TrigramWordDistance>` and
|
||||||
|
:class:`TrigramWordSimilarity()
|
||||||
|
<django.contrib.postgres.search.TrigramWordSimilarity>` expressions allow
|
||||||
|
using trigram word similarity.
|
||||||
|
|
||||||
:mod:`django.contrib.redirects`
|
:mod:`django.contrib.redirects`
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ class Migration(migrations.Migration):
|
|||||||
name='CharFieldModel',
|
name='CharFieldModel',
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||||
('field', models.CharField(max_length=16)),
|
('field', models.CharField(max_length=64)),
|
||||||
],
|
],
|
||||||
options=None,
|
options=None,
|
||||||
bases=None,
|
bases=None,
|
||||||
|
@ -83,7 +83,7 @@ class ArrayEnumModel(PostgreSQLModel):
|
|||||||
|
|
||||||
|
|
||||||
class CharFieldModel(models.Model):
|
class CharFieldModel(models.Model):
|
||||||
field = models.CharField(max_length=16)
|
field = models.CharField(max_length=64)
|
||||||
|
|
||||||
|
|
||||||
class TextFieldModel(models.Model):
|
class TextFieldModel(models.Model):
|
||||||
|
@ -5,7 +5,8 @@ from .models import CharFieldModel, TextFieldModel
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from django.contrib.postgres.search import (
|
from django.contrib.postgres.search import (
|
||||||
TrigramDistance, TrigramSimilarity,
|
TrigramDistance, TrigramSimilarity, TrigramWordDistance,
|
||||||
|
TrigramWordSimilarity,
|
||||||
)
|
)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
@ -30,6 +31,15 @@ class TrigramTest(PostgreSQLTestCase):
|
|||||||
transform=lambda instance: instance.field,
|
transform=lambda instance: instance.field,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_trigram_word_search(self):
|
||||||
|
obj = self.Model.objects.create(
|
||||||
|
field='Gumby rides on the path of Middlesbrough',
|
||||||
|
)
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.filter(field__trigram_word_similar='Middlesborough'),
|
||||||
|
[obj],
|
||||||
|
)
|
||||||
|
|
||||||
def test_trigram_similarity(self):
|
def test_trigram_similarity(self):
|
||||||
search = 'Bat sat on cat.'
|
search = 'Bat sat on cat.'
|
||||||
# Round result of similarity because PostgreSQL 12+ uses greater
|
# Round result of similarity because PostgreSQL 12+ uses greater
|
||||||
@ -43,6 +53,20 @@ class TrigramTest(PostgreSQLTestCase):
|
|||||||
ordered=True,
|
ordered=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_trigram_word_similarity(self):
|
||||||
|
search = 'mat'
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.filter(
|
||||||
|
field__trigram_word_similar=search,
|
||||||
|
).annotate(
|
||||||
|
word_similarity=TrigramWordSimilarity(search, 'field'),
|
||||||
|
).values('field', 'word_similarity').order_by('-word_similarity'),
|
||||||
|
[
|
||||||
|
{'field': 'Cat sat on mat.', 'word_similarity': 1.0},
|
||||||
|
{'field': 'Matthew', 'word_similarity': 0.75},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
def test_trigram_similarity_alternate(self):
|
def test_trigram_similarity_alternate(self):
|
||||||
# Round result of distance because PostgreSQL 12+ uses greater
|
# Round result of distance because PostgreSQL 12+ uses greater
|
||||||
# precision.
|
# precision.
|
||||||
@ -55,6 +79,19 @@ class TrigramTest(PostgreSQLTestCase):
|
|||||||
ordered=True,
|
ordered=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_trigram_word_similarity_alternate(self):
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.annotate(
|
||||||
|
word_distance=TrigramWordDistance('mat', 'field'),
|
||||||
|
).filter(
|
||||||
|
word_distance__lte=0.7,
|
||||||
|
).values('field', 'word_distance').order_by('word_distance'),
|
||||||
|
[
|
||||||
|
{'field': 'Cat sat on mat.', 'word_distance': 0},
|
||||||
|
{'field': 'Matthew', 'word_distance': 0.25},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TrigramTextFieldTest(TrigramTest):
|
class TrigramTextFieldTest(TrigramTest):
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user