Created
November 4, 2018 14:30
-
-
Save jacobian/4b65b3fbb7103829b941e23c506c822a to your computer and use it in GitHub Desktop.
Revisions
-
jacobian created this gist
Nov 4, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,52 @@ # https://stackoverflow.com/questions/53139643/django-postgres-array-field-count-number-of-overlaps # !!! DOESN'T WORK but might with some more poking? class Article(models.Model): keywords = ArrayField(models.CharField(max_length=100)) def __str__(self): return f"<Article {self.id} keywords={self.keywords}>" def find_similar_articles(self, min_overlap=2): # We need the field type to properly cast values below kf = self.__class__._meta.get_field("keywords") # Construct the inner subquery, which is of the form # `SELECT UNNEST(a1) INTERSECT UNNEST(a2)` # where a1 and a2 are arrays of keywords.` # # First step: the left- and right-hand queries, which UNNNEST # the keyword field and provided values into rows lhs = Article.objects.annotate(tag=Unnest(models.F("keywords"))) rhs = Article.objects.annotate(tag=Unnest(self.keywords, output_field=kf)) # Then, we need to make sure to use 'values' to ensure that the # subquery returns only a single field. lhs = lhs.values("tag") rhs = rhs.values("tag") # And here's that inner INTERSECT subquery intersecting_tags_subquery = models.Subquery( lhs.intersection(rhs), output_field=kf ) # Finally, do the query: # 1. exclude this row, because it will (by definition) match qs = self.__class__.objects.exclude(id=self.id) # 2. annotate with the number of matching rows qs = qs.annotate(matching_keywords=Array(intersecting_tags_subquery)) # 3. and find only rows with more than min_overlap matches qs = qs.filter(matching_keywords__len__gte=min_overlap) return qs class Unnest(models.Func): function = "unnest" class Array(models.Func): function = "array" arity = 1