"""
Django ORM Optimization Tips

Caveats:
* Only use optimizations that obfuscate the code if you need to.
* Not all of these tips are hard and fast rules.
* Use your judgement to determine what improvements are appropriate for your code.
"""

# ---------------------------------------------------------------------------
# 1. Profile
# ---------------------------------------------------------------------------

## Use these tools:
## * django-debug-toolbar
## * QuerySet.explain()


# ---------------------------------------------------------------------------
# 2. Be aware of QuerySet's lazy evaluation.
# ---------------------------------------------------------------------------

## 2a. When QuerySets are evaluated

# Iteration
for person in Person.objects.all():
    # Some logic

# Slicing/Indexing
Person.objects.all()[0]

# Pickling (i.e. serialization)
pickle.dumps(Person.objects.all())

# Evaluation functions
repr(Person.objects.all())
len(Person.objects.all())
list(Person.objects.all())
bool(Person.objects.all())

# Other
[person for person in Person.objects.all()]  # List comprehensions
person in Person.objects.all()  # `in` checks

## 2b. When QuerySets are cached/not cached

### Not Cached

# Not reusing evaluated QuerySets
print([p.name for p in Person.objects.all()])  # QuerySet evaluated and cached
print([p.name for p in Person.objects.all()])  # New QuerySet is evaluated and cached

# Slicing/indexing unevaluated QuerySets
queryset = Person.objects.all()
print(queryset[0])  # Queries the database
print(queryset[0])  # Queries the database again

# Printing
print(Person.objects.all())

### Cached

# Reusing an evaluated QuerySet
queryset = Person.objects.all()
print([p.name for p in queryset])  # QuerySet evaluated and cached
print([p.name for p in queryset])  # Cached results are used

# Slicing/indexing evaluated QuerySets
queryset = Person.objects.all()
list(queryset)  # Queryset evaluated and cached
print(queryset[0])  # Cache used
print(queryset[0])  # Cache used


# ---------------------------------------------------------------------------
# 3. Be aware of which attributes are not cached.
# ---------------------------------------------------------------------------

## Not initially retrieved/cached

# Foreign-key related objects
person = Person.objects.get(id=1)
person.father  # foreign object is retrieved and cached
person.father  # cached version is used

## Never cached

# Callable attributes
person = Person.objects.get(id=1)
person.children.all()  # Database hit
person.children.all()  # Another database hit


# ---------------------------------------------------------------------------
# 4. Use select_related() and prefetch_related() when you will need everything.
# ---------------------------------------------------------------------------

# DON'T
queryset = Person.objects.all()
for person in queryset:
    person.father  # Foreign key relationship results in a database hit each iteration

# DO
queryset = Person.objects.all().select_related('father')  # Foreign key object is included in query and cached
for person in queryset:
    person.father  # Hits the cache instead of the database


# ---------------------------------------------------------------------------
# 5. Try to avoid database queries in a loop.
# ---------------------------------------------------------------------------

# DON'T (contrived example)
filtered = Person.objects.filter(first_name='Shallan', last_name='Davar')
for age in range(18):
    person = filtered.get(age=age)  # Database query on each iteration

# DO (contrived example)
filtered = Person.objects.filter(  # Narrow down the QuerySet to only what you need
    first_name='Shallan',
    last_name='Davar',
    age_gte=0,
    age_lte=18,
)
lookup = {person.age: person for person in filtered}  # Evaluate the QuerySet and construct lookup
for age in range(18):
    person = lookup[age]  # No database query


# ---------------------------------------------------------------------------
# 6. Use iterator() to iterate through a very large QuerySet only once.
# ---------------------------------------------------------------------------

# Save memory by not caching anything
for person in Person.objects.iterator():
    # Some logic


# ---------------------------------------------------------------------------
# 7. Do work in the database rather than in Python.
# ---------------------------------------------------------------------------

## 7a. Use filter() and exclude()

# DON'T
for person in Person.objects.all():
    if person.age >= 18:
        # Do something

# DO
for person in Person.objects.filter(age__gte=18):
    # Do something

## 7b. Use F expressions

# DON'T
for person in Person.objects.all():
    person.age += 1
    person.save()

# DO
Person.objects.update(age=F('age') + 1)

## 7c. Do aggregation in the database, if possible

# DON'T
max_age = 0
for person in Person.objects.all():
    if person.age > max_age:
        max_age = person.age

# DO
max_age = Person.objects.all().aggregate(Max('age'))['age__max']


# ---------------------------------------------------------------------------
# 8. Use values() and values_list() to get only the things you need.
# ---------------------------------------------------------------------------

## 8a. Use values()

# DON'T
age_lookup = {
    person.name: person.age
    for person in Person.objects.all()
}

# DO
age_lookup = {
    person['name']: person['age']
    for person in Person.objects.values('name', 'age')
}

## 8b. Use values_list()

# DON'T
person_ids = [person.id for person in Person.objects.all()]

# DO
person_ids = Person.objects.values_list('id', flat=True)


# ---------------------------------------------------------------------------
# 9. Use defer() and only() when you know you won't need certain fields.
#
# * Use when you need a QuerySet instead of a list of dicts from values().
# * Really only useful to defer fields that require significant processing to convert to a python object.
# ---------------------------------------------------------------------------

## 9a. Use defer()

queryset = Person.objects.defer('age')  # Imagine age is computationally expensive
for person in queryset:
    print(person.id)
    print(person.name)

## 9b. Use only()

queryset = Person.objects.only('name')
for person in queryset:
    print(person.name)


# ---------------------------------------------------------------------------
# 10. Use count() and exists() when you don't need the contents of the QuerySet.
#
# * Caveat: Only use these when you don't need to evaluate the QuerySet.
# ---------------------------------------------------------------------------

## 10a. Use count()

# DON'T
count = len(Person.objects.all())  # Evaluates the entire queryset

# DO
count = Person.objects.count()  # Executes more efficient SQL to determine count

## 10b. Use exists()

# DON'T
exists = len(Person.objects.all()) > 0

# DO
exists = Person.objects.exists()


# ---------------------------------------------------------------------------
# 11. Use delete() and update() when possible.
# ---------------------------------------------------------------------------

## 11a. Use delete()

# DON'T
for person in Person.objects.all():
    person.delete()

# DO
Person.objects.all().delete()

## 11b. Use update()

# DON'T
for person in Person.objects.all():
    person.age = 0
    person.save()

# DO
Person.objects.update(age=0)


# ---------------------------------------------------------------------------
# 12. Use bulk_create() when possible.
#
# * Caveats: https://docs.djangoproject.com/en/2.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create
# ---------------------------------------------------------------------------

# Bulk Create
names = ['Jeff', 'Beth', 'Tim']
creates = []
for name in names:
    creates.append(
        Person(name=name, age=0)
    )
Person.objects.bulk_create(creates)

# Bulk add to many-to-many fields
person = Person.objects.get(id=1)
person.jobs.add(job1, job2, job3)


# ---------------------------------------------------------------------------
# 13. Use foreign key values directly.
# ---------------------------------------------------------------------------

# DON'T
father_id = Person.objects.get(id=1).father.id  # Causes a needless database query

# DO
father_id = Person.objects.get(id=1).father_id  # The foreign key is already cached. No query