djangopostgresqlperformancedjango-rest-frameworkdjango-rest-viewsets

DRF prefetch related still causing N+1 queries


Models:

class GradePolicy(models.Model):
    name = models.CharField(max_length=30)
    minScore = models.DecimalField(default=0, max_digits=4, decimal_places=1, db_column="minscore")
    maxScore = models.DecimalField(max_digits=4, decimal_places=1, default=100, db_column="maxscore")
    policyChoices = (
        ('Numerical', 'Numerical'),
        ('Textual', 'Textual')
    )
    type = models.CharField(max_length=30, default='Textual', choices=policyChoices)


class GradeLevel(models.Model):
    name = models.CharField(max_length=30)
    score = models.DecimalField(decimal_places=2, max_digits=5)
    abbreviation = models.CharField(max_length=4)
    policy = models.ForeignKey(GradePolicy, null=True, blank=True,
                           on_delete=models.DO_NOTHING)  

Views:

class GradePolicyViewSet(viewsets.ModelViewSet):
    """
    Retrieve grade policies
    """
    queryset = GradePolicy.objects.prefetch_related('gradelevel_set').order_by('pk')
    serializer_class = GradePolicySerializer

class GradeLevelViewSet(viewsets.ModelViewSet):
    queryset = GradeLevel.objects.all().order_by('pk')
    serializer_class = GradeLevelSerializer

Serializers:

class GradeLevelSerializer(serializers.HyperlinkedModelSerializer):
    url = serializers.HyperlinkedIdentityField(view_name="gbook:gradelevels-detail")
    policy = serializers.PrimaryKeyRelatedField(read_only=True)

    class Meta:
        model = GradeLevel
        fields = ['pk', 'url', 'name', 'score', 'abbreviation', 'policy']


class GradePolicySerializer(serializers.HyperlinkedModelSerializer):
    url = serializers.HyperlinkedIdentityField(view_name="gbook:gradepolicies-detail")
    levels = serializers.SerializerMethodField()

    def get_levels(self, obj):
        policy: GradePolicy = obj
        levels = policy.gradelevel_set.prefetch_related('policy').order_by("-score").all()
        return GradeLevelSerializer(levels, many=True, context={'request': None}).data

    class Meta:
        model = GradePolicy
        fields = ['pk', 'url', 'name', 'minScore', 'maxScore', 'type', 'levels']

I'm getting reports from Sentry that calling /gradepolicies, which is the list endpoint for GradePolicy, is causing N+1 queries. Looking at the queries, that does seem to be the case:

(0.004) SELECT "gbook_gradepolicy"."id", "gbook_gradepolicy"."name", "gbook_gradepolicy"."minscore", "gbook_gradepolicy"."maxscore", "gbook_gradepolicy"."type" FROM "gbook_gradepolicy" ORDER BY "gbook_gradepolicy"."id" ASC; args=()

(0.005) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 2 ORDER BY "gbook_gradelevel"."score" DESC; args=(2,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 3 ORDER BY "gbook_gradelevel"."score" DESC; args=(3,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 4 ORDER BY "gbook_gradelevel"."score" DESC; args=(4,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 5 ORDER BY "gbook_gradelevel"."score" DESC; args=(5,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 6 ORDER BY "gbook_gradelevel"."score" DESC; args=(6,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 7 ORDER BY "gbook_gradelevel"."score" DESC; args=(7,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 8 ORDER BY "gbook_gradelevel"."score" DESC; args=(8,)

To prevent this, I'm prefetching as shown in the code above. The result of that is:

(0.002) SELECT "gbook_gradepolicy"."id", "gbook_gradepolicy"."name", "gbook_gradepolicy"."minscore", "gbook_gradepolicy"."maxscore", "gbook_gradepolicy"."type" FROM "gbook_gradepolicy" ORDER BY "gbook_gradepolicy"."id" ASC; args=()

(0.004) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" IN (2, 3, 4, 5, 6, 7, 8); args=(2, 3, 4, 5, 6, 7, 8)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 2 ORDER BY "gbook_gradelevel"."score" DESC; args=(2,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 3 ORDER BY "gbook_gradelevel"."score" DESC; args=(3,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 4 ORDER BY "gbook_gradelevel"."score" DESC; args=(4,)

(0.000) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 5 ORDER BY "gbook_gradelevel"."score" DESC; args=(5,)

(0.001) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 6 ORDER BY "gbook_gradelevel"."score" DESC; args=(6,)

(0.000) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 7 ORDER BY "gbook_gradelevel"."score" DESC; args=(7,)

(0.000) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" = 8 ORDER BY "gbook_gradelevel"."score" DESC; args=(8,)

I see in there what looks like the result of the prefetch ((0.004) SELECT "gbook_gradelevel"."id", "gbook_gradelevel"."name", "gbook_gradelevel"."score", "gbook_gradelevel"."abbreviation", "gbook_gradelevel"."policy_id" FROM "gbook_gradelevel" WHERE "gbook_gradelevel"."policy_id" IN (2, 3, 4, 5, 6, 7, 8); args=(2, 3, 4, 5, 6, 7, 8), but it still performs all of the extra queries below.

How can I get rid of the N+1 query issue with this lookup?


Solution

  • It is the get_levels method that is slowing down your API. You don't need it just replace it with your GradeLevelSerializer like this:

    class GradePolicySerializer(serializers.HyperlinkedModelSerializer):
        url = serializers.HyperlinkedIdentityField(view_name="gbook:gradepolicies-detail")
        levels = GradeLevelSerializer(many=True, read_only=True)
    
        class Meta:
            model = GradePolicy
            fields = ['pk', 'url', 'name', 'minScore', 'maxScore', 'type', 'levels']
    

    The prefetch_related is fine in viewset as you are doing reverse relationship.

    You can specify the ordering on score as well in your viewset using Prefetch

    By default DRF doesn't optimize the database hits for more take a look here

    You can also update related name of your reverse relationship like this:

    class GradeLevel(models.Model):
      ...
      policy = models.ForeignKey(GradePolicy, null=True, blank=True, on_delete=models.DO_NOTHING, related_name='levels')