""" Django ORM Optimization Tips Caveats: * Only use optimizations that obfuscate the code if you need to. * Not all of these tips are hard and fast rules. * Use your judgement to determine what improvements are appropriate for your code. """ # --------------------------------------------------------------------------- # 1. Profile # --------------------------------------------------------------------------- ## Use these tools: ## * django-debug-toolbar ## * QuerySet.explain() # --------------------------------------------------------------------------- # 2. Be aware of QuerySet's lazy evaluation. # --------------------------------------------------------------------------- ## 2a. When QuerySets are evaluated # Iteration for person in Person.objects.all(): # Some logic # Slicing/Indexing Person.objects.all()[0] # Pickling (i.e. serialization) pickle.dumps(Person.objects.all()) # Evaluation functions repr(Person.objects.all()) len(Person.objects.all()) list(Person.objects.all()) bool(Person.objects.all()) # Other [person for person in Person.objects.all()] # List comprehensions person in Person.objects.all() # `in` checks ## 2b. When QuerySets are cached/not cached ### Not Cached # Not reusing evaluated QuerySets print([p.name for p in Person.objects.all()]) # QuerySet evaluated and cached print([p.name for p in Person.objects.all()]) # New QuerySet is evaluated and cached # Slicing/indexing unevaluated QuerySets queryset = Person.objects.all() print(queryset[0]) # Queries the database print(queryset[0]) # Queries the database again # Printing print(Person.objects.all()) ### Cached # Reusing an evaluated QuerySet queryset = Person.objects.all() print([p.name for p in queryset]) # QuerySet evaluated and cached print([p.name for p in queryset]) # Cached results are used # Slicing/indexing evaluated QuerySets queryset = Person.objects.all() list(queryset) # Queryset evaluated and cached print(queryset[0]) # Cache used print(queryset[0]) # Cache used # --------------------------------------------------------------------------- # 3. Be aware of which attributes are not cached. # --------------------------------------------------------------------------- ## Not initially retrieved/cached # Foreign-key related objects person = Person.objects.get(id=1) person.father # foreign object is retrieved and cached person.father # cached version is used ## Never cached # Callable attributes person = Person.objects.get(id=1) person.children.all() # Database hit person.children.all() # Another database hit # --------------------------------------------------------------------------- # 4. Use select_related() and prefetch_related() when you will need everything. # --------------------------------------------------------------------------- # DON'T queryset = Person.objects.all() for person in queryset: person.father # Foreign key relationship results in a database hit each iteration # DO queryset = Person.objects.all().select_related('father') # Foreign key object is included in query and cached for person in queryset: person.father # Hits the cache instead of the database # --------------------------------------------------------------------------- # 5. Try to avoid database queries in a loop. # --------------------------------------------------------------------------- # DON'T (contrived example) filtered = Person.objects.filter(first_name='Shallan', last_name='Davar') for age in range(18): person = filtered.get(age=age) # Database query on each iteration # DO (contrived example) filtered = Person.objects.filter( # Narrow down the QuerySet to only what you need first_name='Shallan', last_name='Davar', age_gte=0, age_lte=18, ) lookup = {person.age: person for person in filtered} # Evaluate the QuerySet and construct lookup for age in range(18): person = lookup[age] # No database query # --------------------------------------------------------------------------- # 6. Use iterator() to iterate through a very large QuerySet only once. # --------------------------------------------------------------------------- # Save memory by not caching anything for person in Person.objects.iterator(): # Some logic # --------------------------------------------------------------------------- # 7. Do work in the database rather than in Python. # --------------------------------------------------------------------------- ## 7a. Use filter() and exclude() # DON'T for person in Person.objects.all(): if person.age >= 18: # Do something # DO for person in Person.objects.filter(age__gte=18): # Do something ## 7b. Use F expressions # DON'T for person in Person.objects.all(): person.age += 1 person.save() # DO Person.objects.update(age=F('age') + 1) ## 7c. Do aggregation in the database, if possible # DON'T max_age = 0 for person in Person.objects.all(): if person.age > max_age: max_age = person.age # DO max_age = Person.objects.all().aggregate(Max('age'))['age__max'] # --------------------------------------------------------------------------- # 8. Use values() and values_list() to get only the things you need. # --------------------------------------------------------------------------- ## 8a. Use values() # DON'T age_lookup = { person.name: person.age for person in Person.objects.all() } # DO age_lookup = { person['name']: person['age'] for person in Person.objects.values('name', 'age') } ## 8b. Use values_list() # DON'T person_ids = [person.id for person in Person.objects.all()] # DO person_ids = Person.objects.values_list('id', flat=True) # --------------------------------------------------------------------------- # 9. Use defer() and only() when you know you won't need certain fields. # # * Use when you need a QuerySet instead of a list of dicts from values(). # * Really only useful to defer fields that require significant processing to convert to a python object. # --------------------------------------------------------------------------- ## 9a. Use defer() queryset = Person.objects.defer('age') # Imagine age is computationally expensive for person in queryset: print(person.id) print(person.name) ## 9b. Use only() queryset = Person.objects.only('name') for person in queryset: print(person.name) # --------------------------------------------------------------------------- # 10. Use count() and exists() when you don't need the contents of the QuerySet. # # * Caveat: Only use these when you don't need to evaluate the QuerySet. # --------------------------------------------------------------------------- ## 10a. Use count() # DON'T count = len(Person.objects.all()) # Evaluates the entire queryset # DO count = Person.objects.count() # Executes more efficient SQL to determine count ## 10b. Use exists() # DON'T exists = len(Person.objects.all()) > 0 # DO exists = Person.objects.exists() # --------------------------------------------------------------------------- # 11. Use delete() and update() when possible. # --------------------------------------------------------------------------- ## 11a. Use delete() # DON'T for person in Person.objects.all(): person.delete() # DO Person.objects.all().delete() ## 11b. Use update() # DON'T for person in Person.objects.all(): person.age = 0 person.save() # DO Person.objects.update(age=0) # --------------------------------------------------------------------------- # 12. Use bulk_create() when possible. # # * Caveats: https://docs.djangoproject.com/en/2.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create # --------------------------------------------------------------------------- # Bulk Create names = ['Jeff', 'Beth', 'Tim'] creates = [] for name in names: creates.append( Person(name=name, age=0) ) Person.objects.bulk_create(creates) # Bulk add to many-to-many fields person = Person.objects.get(id=1) person.jobs.add(job1, job2, job3) # --------------------------------------------------------------------------- # 13. Use foreign key values directly. # --------------------------------------------------------------------------- # DON'T father_id = Person.objects.get(id=1).father.id # Causes a needless database query # DO father_id = Person.objects.get(id=1).father_id # The foreign key is already cached. No query