pyg-team · nv-rliu · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
@@ -322,8 +322,15 @@ def index_kg(args, context_docs):
     checkpoint_path = os.path.join(args.dataset, "checkpoint_kg.pt")
     if os.path.exists(checkpoint_path):
         print("Restoring KG from checkpoint...")
-        saved_relevant_triples = torch.load(checkpoint_path,
-                                            weights_only=False)
+        checkpoint_data = torch.load(checkpoint_path)
+
+        # check to see if triples generation are using the correct model
+        if args.NV_NIM_MODEL.split('/')[-1] != checkpoint_data['model']:
+            raise RuntimeError(
+                "Error: The stored triples were generated using a different model"
+            )
+
+        saved_relevant_triples = checkpoint_data['triples']
         kg_maker.relevant_triples = saved_relevant_triples
         kg_maker.doc_id_counter = len(saved_relevant_triples)
         initial_tqdm_count = kg_maker.doc_id_counter
@@ -346,7 +353,10 @@ def index_kg(args, context_docs):
                             for triple_set in relevant_triples.values()))
     triples = list(dict.fromkeys(triples))
     raw_triples_path = os.path.join(args.dataset, "raw_triples.pt")
-    torch.save(triples, raw_triples_path)
+    torch.save({
+        "model": kg_maker.get_model_name(),
+        "triples": triples,
+    }, raw_triples_path)
     if os.path.exists(checkpoint_path):
         os.remove(checkpoint_path)
     return triples
@@ -409,12 +419,18 @@ def make_dataset(args):
     triples = []
     raw_triples_path = os.path.join(args.dataset, "raw_triples.pt")
     if os.path.exists(raw_triples_path):
-        triples = torch.load(raw_triples_path, weights_only=False)
+        saved_data = torch.load(raw_triples_path)
+        if args.NV_NIM_MODEL.split('/')[-1] != saved_data['model']:
+            raise RuntimeError(
+                "Error: The stored triples were generated using a different model"
+            )
+
+        print(f" -> Saved triples generated with: {saved_data['model']}")
+        triples = saved_data['triples']
     else:
         triples = index_kg(args, context_docs)
 
     print("Number of triples in our GraphDB =", len(triples))
-
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     # creating the embedding model
@@ -746,7 +762,6 @@ def eval(question: str, pred: str, correct_answer: str):
             data_lists = update_data_lists(args, data_lists)
     else:
         data_lists = make_dataset(args)
-
     batch_size = args.batch_size
     eval_batch_size = args.eval_batch_size
     train_loader = DataLoader(data_lists["train"], batch_size=batch_size,

@@ -85,7 +85,11 @@ def save_kg(self, path: str) -> None:
         Returns:
             None
         """
-        torch.save(self.relevant_triples, path)
+        torch.save(
+            {
+                "model": self.get_model_name(),
+                "triples": self.relevant_triples
+            }, path)
 
     def _chunk_to_triples_str_local(self, txt: str) -> str:
         # call LLM on text
@@ -192,6 +196,11 @@ def add_doc_2_KG(
         # Increment the doc_id_counter for the next document
         self.doc_id_counter += 1
 
+    def get_model_name(self) -> str:
+        """Returns the name of model used to generate triples
+        """
+        return self.NIM_MODEL.split('/')[-1] if not self.local_LM else "local"
+
 
 def _chunk_to_triples_str_cloud(
         txt: str, GLOBAL_NIM_KEY='',