-
Notifications
You must be signed in to change notification settings - Fork 295
Description
I am trying to build a recommendation system using the TensorFlow Recommenders (TFRS), and I'm facing an issue while inferencing the trained model. I have created the Query and Candidate Towers and set the embedding dimension to be the length of unique movies in my dataset. The user features have already been normailzed.
class UserModel(tf.keras.Model):
def __init__(self, users_vocab, embedding_dimension = 128):
super().__init__()
self.user_model = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.IntegerLookup(vocabulary=users_vocab),
tf.keras.layers.Embedding(len(users_vocab) + 2, embedding_dimension)
])
def call(self, inputs):
# Take the input dictionary, pass it through each input layer,
# and concatenate the result.
return tf.concat([
self.user_model(inputs["USER_ID"]),
tf.reshape(inputs["USER_FEATURE"], [-1, 1])
], axis=1)
class QueryModel(tf.keras.Model):
"""Model for encoding user queries."""
def __init__(self, users_vocab):
super().__init__()
# We first use the user model for generating embeddings.
self.embedding_model = UserModel(users_vocab)
# Then construct the layers.
self.dense_layers = tf.keras.Sequential()
# Use the ReLU activation for all but the last layer.
self.dense_layers.add(tf.keras.layers.Dense(128, activation="relu"))
self.dense_layers.add(tf.keras.layers.Dropout(0.1))
self.dense_layers.add(tf.keras.layers.Dense(64, activation="relu"))
self.dense_layers.add(tf.keras.layers.Dropout(0.1))
# No activation for the last layer.
self.dense_layers.add(tf.keras.layers.Dense(32))
def call(self, inputs):
feature_embedding = self.embedding_model(inputs)
return self.dense_layers(feature_embedding)
class MovieModel(tf.keras.Model):
def __init__(self, movies_vocab, embedding_dimension=256):
super().__init__()
self.movie_model = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.IntegerLookup(vocabulary=movies_vocab),
tf.keras.layers.Embedding(len(movies_vocab) + 2, embedding_dimension)
])
def call(self, inputs):
return tf.concat([
self.movie_model(inputs["MOVIE_ID"]),
tf.reshape(inputs["MOVIE_FEATURES"], [-1, 1]),
], axis=1)
class CandidateModel(tf.keras.Model):
"""Model for encoding movies."""
def __init__(self, movies_vocab):
super().__init__()
self.embedding_model = MovieModel(movies_vocab)
# Then construct the layers.
self.dense_layers = tf.keras.Sequential()
# Use the ReLU activation for all but the last layer.
self.dense_layers.add(tf.keras.layers.Dense(128, activation="relu"))
self.dense_layers.add(tf.keras.layers.Dropout(0.1))
self.dense_layers.add(tf.keras.layers.Dense(64, activation="relu"))
self.dense_layers.add(tf.keras.layers.Dropout(0.1))
# No activation for the last layer.
self.dense_layers.add(tf.keras.layers.Dense(32))
def call(self, inputs):
# print("Candidate Model", inputs)
feature_embedding = self.embedding_model(inputs)
return self.dense_layers(feature_embedding)
class RetrievalModel(tfrs.Model):
def __init__(self, users_vocab, movies_vocab, movie_features):
super().__init__()
self.query_model = QueryModel(users_vocab)
self.candidate_model = CandidateModel(movies_vocab)
self.task = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=movie_features.batch(8096).map(self.candidate_model),
),
)
def compute_loss(self, features, training=True):
query_embeddings = self.query_model({
"USER_ID": features["USER_ID"],
"USER_FEATURES": features["USER_FEATURES"],
})
movie_embeddings = self.candidate_model({
"MOVIE_ID": features["MOVIE_ID"],
"MOVIE_FEATURES": features["MOVIE_FEATURES"],
})
return self.task(
query_embeddings, movie_embeddings)
After training when I try to get recommendations by creating index using the BruteForce Layer, and d is the dictionary containing UserId and User Features
index = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
index.index(movie_features.batch(16192).map(model.candidate_model), movies)
_, titles = index(d, k=1)
My model recommends movies which are not there in my embedding vector space and throws this error when I try to get recommendations for k>1
InvalidArgumentError: indices[0,1] = 204244 is not in [0, 83349) [Op:ResourceGather]
Whereas when I pass k=1, the model gives me a movie ID
recommendations for user [105]: [Movie ID]
How do I resolve this issue?
Thank You! :)
EDIT 1: To add to the above problem, when I pass candidate features without the identifier index.index(movie_features.batch(16192).map(model.candidate_model)), then my model returns indices correctly, but these indices values are greater than the length of movies variable.
movie_features: <MapDataset shapes: {MOVIE_ID: (), MOVIE_FEATURE: ()}, types: {MOVIE_ID: tf.int32, MOVIE_FEATURE: tf.float64}>
moveis: list of unique movie id's movies = training_data.MOVIE_ID.unique() of length 83349