Skip to content

Problem while Inferencing: InvalidArgumentError: indices[0,1] = 66521 is not in [0, 12976) [Op:ResourceGather] #286

@aajais

Description

@aajais

I am trying to build a recommendation system using the TensorFlow Recommenders (TFRS), and I'm facing an issue while inferencing the trained model. I have created the Query and Candidate Towers and set the embedding dimension to be the length of unique movies in my dataset. The user features have already been normailzed.

class UserModel(tf.keras.Model):
    def __init__(self, users_vocab, embedding_dimension = 128):
        super().__init__()
        self.user_model = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(vocabulary=users_vocab),
            tf.keras.layers.Embedding(len(users_vocab) + 2, embedding_dimension)
        ])

    def call(self, inputs):
        # Take the input dictionary, pass it through each input layer,
        # and concatenate the result.
        return tf.concat([
            self.user_model(inputs["USER_ID"]),
            tf.reshape(inputs["USER_FEATURE"], [-1, 1])
        ], axis=1)
class QueryModel(tf.keras.Model):
    """Model for encoding user queries."""

    def __init__(self, users_vocab):
        super().__init__()
        # We first use the user model for generating embeddings.
        self.embedding_model = UserModel(users_vocab)

        # Then construct the layers.
        self.dense_layers = tf.keras.Sequential()

        # Use the ReLU activation for all but the last layer.
        self.dense_layers.add(tf.keras.layers.Dense(128, activation="relu"))
        self.dense_layers.add(tf.keras.layers.Dropout(0.1))
        self.dense_layers.add(tf.keras.layers.Dense(64, activation="relu"))
        self.dense_layers.add(tf.keras.layers.Dropout(0.1))

        # No activation for the last layer.
        self.dense_layers.add(tf.keras.layers.Dense(32))

    def call(self, inputs):
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)


class MovieModel(tf.keras.Model):

    def __init__(self, movies_vocab, embedding_dimension=256):
        super().__init__()

        self.movie_model = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(vocabulary=movies_vocab),
            tf.keras.layers.Embedding(len(movies_vocab) + 2, embedding_dimension)
        ])

    def call(self, inputs):
        return tf.concat([
            self.movie_model(inputs["MOVIE_ID"]),
            tf.reshape(inputs["MOVIE_FEATURES"], [-1, 1]),
        ], axis=1)


class CandidateModel(tf.keras.Model):
    """Model for encoding movies."""

    def __init__(self, movies_vocab):
        super().__init__()
        self.embedding_model = MovieModel(movies_vocab)

        # Then construct the layers.
        self.dense_layers = tf.keras.Sequential()

        # Use the ReLU activation for all but the last layer.
        self.dense_layers.add(tf.keras.layers.Dense(128, activation="relu"))
        self.dense_layers.add(tf.keras.layers.Dropout(0.1))
        self.dense_layers.add(tf.keras.layers.Dense(64, activation="relu"))
        self.dense_layers.add(tf.keras.layers.Dropout(0.1))

        # No activation for the last layer.
        self.dense_layers.add(tf.keras.layers.Dense(32))

    def call(self, inputs):
        # print("Candidate Model", inputs)
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)


class RetrievalModel(tfrs.Model):

    def __init__(self, users_vocab, movies_vocab, movie_features):
        super().__init__()

        self.query_model = QueryModel(users_vocab)
        self.candidate_model = CandidateModel(movies_vocab)
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=movie_features.batch(8096).map(self.candidate_model),
            ),
        )

    def compute_loss(self, features, training=True):
        query_embeddings = self.query_model({
            "USER_ID": features["USER_ID"],
            "USER_FEATURES": features["USER_FEATURES"],
        })
        movie_embeddings = self.candidate_model({
            "MOVIE_ID": features["MOVIE_ID"],
            "MOVIE_FEATURES": features["MOVIE_FEATURES"],
        })

        return self.task(
            query_embeddings, movie_embeddings)

After training when I try to get recommendations by creating index using the BruteForce Layer, and d is the dictionary containing UserId and User Features

index = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
index.index(movie_features.batch(16192).map(model.candidate_model), movies)
_, titles = index(d, k=1)

My model recommends movies which are not there in my embedding vector space and throws this error when I try to get recommendations for k>1

InvalidArgumentError: indices[0,1] = 204244 is not in [0, 83349) [Op:ResourceGather]
Whereas when I pass k=1, the model gives me a movie ID

recommendations for user [105]: [Movie ID]
How do I resolve this issue?

Thank You! :)

EDIT 1: To add to the above problem, when I pass candidate features without the identifier index.index(movie_features.batch(16192).map(model.candidate_model)), then my model returns indices correctly, but these indices values are greater than the length of movies variable.

movie_features: <MapDataset shapes: {MOVIE_ID: (), MOVIE_FEATURE: ()}, types: {MOVIE_ID: tf.int32, MOVIE_FEATURE: tf.float64}>
moveis: list of unique movie id's movies = training_data.MOVIE_ID.unique() of length 83349

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions