pythonmachine-learningdjango-rest-frameworkrecommendation-enginelenskit

LensKit Recommender only returns results for some users, otherwise returns empty DataFrame. Why is this happening?


I am trying to implement a group recommender system with the Django framework, using the LensKit tools for Python (specifically a Recommender object which adapts the UserUser algorithm). However, it only returns individual recommendations in some cases (for some specific users), but it always returns recommendations for groups of users (I create a hybrid user whose scores are the average of group members' scores and request recommendations for it). Below is my implementation for requesting recommendations for an individual user, as well as for a group:

from rest_framework import viewsets, status
from .models import Movie, Rating, Customer, Recommendation
from .serializers import MovieSerializer, RatingSerializer, UserSerializer, GroupSerializer, CustomerSerializer, RecommendationSerializer
from rest_framework.response import Response
from rest_framework.decorators import action
from django.contrib.auth.models import User, Group
from rest_framework.authentication import TokenAuthentication
from rest_framework.permissions import IsAuthenticated, AllowAny
from pandas import Series
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
import lenskit.datasets as ds


class CustomerViewSet(viewsets.ModelViewSet):
    queryset = Customer.objects.all()
    serializer_class = CustomerSerializer
    authentication_classes = (TokenAuthentication,)
    permission_classes = (IsAuthenticated,)

@action(methods=['GET'], detail=False)
    def recommendations(self, request):
        if 'genre' in request.data:
            genre = request.data['genre']
        else:
            genre = 'All'

        user = request.user
        ratings = Rating.objects.filter(user=user.id)
        user_dict = {} #dictionary of user ratings

        name = user.username
        #print('name', name)

        #Deleting the recommendations resulted from previous requests before generating new ones
        Recommendation.objects.filter(name=name).delete()

        for rating in ratings:
            stars = rating.stars
            movieId = int(rating.movie.movieId)
            user_dict[movieId] = stars
        #print(user_dict)

        data = ds.MovieLens('datasets/')
        user_user = UserUser(15, min_nbrs=3)
        algo = Recommender.adapt(user_user)
        algo.fit(data.ratings)
        #print(algo)
        #print(data.ratings)

        """
        Recommendations are generated based on a user that is not included in a training set (hence, their id is -1),
        and a pandas.Series obtained from the ratings dictionary.
        """
        recs = algo.recommend(user=-1, ratings=Series(user_dict))
        #print(recs)

        #Parsing the resulting DataFrame and saving recommendations as objects
        for index, row in recs.iterrows():
            #print(row['item'])
            movieId = row['item']
            stars = row['score']

            if genre == 'All':
                Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)
            if genre != 'All' and genre in movie.genres:
                Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)

            #Stopping at 20 recommended items
            count = Recommendation.objects.filter(name=name).count()
            #print('count', count)
            if count >= 20:
                break

        #Returning the movies ordered by the predicted ratings for them
        recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
        rec_movies = []
        for rec in recs:
            mov = Movie.objects.get(movieId=rec.movieId)
            rec_movies.append(mov)

        serializer = MovieSerializer(rec_movies, many=True)
        return Response(serializer.data, status=status.HTTP_200_OK)


class GroupViewSet(viewsets.ModelViewSet):
    queryset = Group.objects.all()
    serializer_class = GroupSerializer
    authentication_classes = (TokenAuthentication,)
    permission_classes = (IsAuthenticated, )

    @action(methods=['GET'], detail=True)
    def recommendations(self, request, pk=None):
        if 'genre' in request.data:
            genre = request.data['genre']
        else:
            genre = 'All'

        #Checking if the user belongs to the group
        group = Group.objects.get(id=pk)
        users = group.user_set.all()
        #print(users)
        user = request.user
        #print(user)

        if user in users:
            # Deleting the recommendations resulted from previous requests before generating new ones
            Recommendation.objects.filter(name=group.name).delete()

            rating_dict = {} #a dictionary of average ratings for the group
            for user in users:
                ratings = Rating.objects.filter(user=user.id)
                for rating in ratings:
                    stars = rating.stars
                    movieId = int(rating.movie.movieId)

                    """
                    If the movie has already been rated by another member (i.e. a rating for it exists in the 
                    dictionary), an average rating is computed
                    """
                    if movieId in rating_dict:
                        x = rating_dict[movieId][0]
                        y = rating_dict[movieId][1]
                        x = (x * y + stars) / (y+1)
                        y += 1
                        rating_dict[movieId][0] = x
                        rating_dict[movieId][1] = y
                    #If not, the individual rating is simply insteted into the dictionary
                    else:
                        rating_dict[movieId] = [stars, 1]

            #Training the ML algorithm
            data = ds.MovieLens('datasets/')
            user_user = UserUser(15, min_nbrs=3)
            algo = Recommender.adapt(user_user)
            algo.fit(data.ratings)

            for key in rating_dict.keys():
                x = rating_dict[key][0]
                rating_dict[key] = x
            #print(rating_dict)

            #Requesting recommendations for the hybrid user
            recs = algo.recommend(user=-1, ratings=Series(rating_dict))
            #print(recs)

            genre = request.data['genre']
            name = group.name

            #Parsing the resulting DataFrame and saving the recommendations as objects
            for index, row in recs.iterrows():
                print(row['item'])
                movie = Movie.objects.get(movieId=str(int(row['item'])))
                stars = row['score']
                name = group.name
                if genre == 'All':
                    Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)
                if genre != 'All' and genre in movie.genres:
                    Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)

                #Stopping at 20 recommendations
                count = Recommendation.objects.filter(name=name).count()
                print('count', count)
                if count >= 20:
                    break

            #Returning movies ordered by the predicted score for the group
            recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
            rec_movies = []
            for rec in recs:
                mov = Movie.objects.get(movieId=rec.movieId)
                rec_movies.append(mov)
            serializer = MovieSerializer(rec_movies, many=True)

            return Response(serializer.data, status=status.HTTP_200_OK)

        else:
            response = {'message': 'You are not a member of this group'}
            return Response(response, status=status.HTTP_400_BAD_REQUEST)

Here is an example of working response:

[
    {
        "id": 17521,
        "movieId": "318",
        "title": "Shawshank Redemption, The (1994)",
        "genres": "Crime|Drama",
        "link": "https://www.imdb.com/title/tt0111161/",
        "average_rating": 4.487138263665595,
        "no_ratings": 311,
        "poster": "/default-movie.jpg"
    },
    {
        "id": 17503,
        "movieId": "296",
        "title": "Pulp Fiction (1994)",
        "genres": "Comedy|Crime|Drama|Thriller",
        "link": "https://www.imdb.com/title/tt0110912/",
        "average_rating": 4.256172839506172,
        "no_ratings": 324,
        "poster": "/default-movie.jpg"
    },
    ...
]

A non-working response:

[]

In the latter case, printing the DataFrame returned by the Recommender shows this:

Empty DataFrame
Columns: [item, score]
Index: []

I'm not sure what I'm doing wrong. Can anybody help?


Solution

  • The most likely cause of this problem is that the user-user recommender cannot build enough viable neighborhoods to provide recommendations. This is a downside to neighborhood-based recommendations.

    The solutions are to either switch to an algorithm that can always recommend for a user with some ratings (e.g. one of the matrix factorization algorithms), and/or use a fallback algorithm such as Popular to recommend when the personalized collaborative filter cannot recommend.

    (Another solution would be to implement one of the various cold-start recommenders or a content-based recommender for LensKit, but none are currently provided by the project.)