[fix,feat] update the state_key based on module (#675)

Sasha Sheng · facebook-github-bot · commit a221f064b86e · 2020-12-02T16:01:06.000-08:00
Summary: - Allow for module-wise state dict key update - Make use of the `_register_load_state_dict_pre_hook` to update the key of the state dict - opted for this approach because recursion is already being implemented in the load_state_dict function and therefore I think there is no need to re-implement recursion. Better to make use of the pytorch implementation. - Slightly cleaner fix compared to this fix: [664](#664) - Some documentation clean up Pull Request resolved: #675 Reviewed By: vedanuj Differential Revision: D24714619 Pulled By: ytsheng fbshipit-source-id: ccbf85c9aedae4bded3234d9b178e6b34241bbc3
diff --git a/mmf/models/base_model.py b/mmf/models/base_model.py
@@ -60,7 +60,7 @@ def forward(self, sample_list):
 
 
 class BaseModel(nn.Module):
-    """For integration with Pythia's trainer, datasets and other features,
+    """For integration with MMF's trainer, datasets and other features,
     models needs to inherit this class, call `super`, write a build function,
     write a forward function taking a ``SampleList`` as input and returning a
     dict as output and finally, register it using ``@registry.register_model``
@@ -124,8 +124,8 @@ def config_path(cls):
 
     @classmethod
     def format_state_key(cls, key):
-        """Can be implemented if something special needs to be done
-        key when pretrained model is being load. This will adapt and return
+        """Can be implemented if something special needs to be done to the
+        key when pretrained model is being loaded. This will adapt and return
         keys according to that. Useful for backwards compatibility. See
         updated load_state_dict below. For an example, see VisualBERT model's
         code.
diff --git a/mmf/models/m4c.py b/mmf/models/m4c.py
@@ -33,12 +33,6 @@ def __init__(self, config):
     def config_path(cls):
         return "configs/models/m4c/defaults.yaml"
 
-    @classmethod
-    def format_state_key(cls, key):
-        key = key.replace("obj_faster_rcnn_fc7.module.lc", "obj_faster_rcnn_fc7.lc")
-        key = key.replace("ocr_faster_rcnn_fc7.module.lc", "ocr_faster_rcnn_fc7.lc")
-        return key
-
     def build(self):
         # modules requiring custom learning rates (usually for finetuning)
         self.finetune_modules = []
diff --git a/mmf/models/movie_mcan.py b/mmf/models/movie_mcan.py
@@ -30,13 +30,6 @@ def __init__(self, config):
     def config_path(cls):
         return "configs/models/movie_mcan/defaults.yaml"
 
-    @classmethod
-    def format_state_key(cls, key):
-        key = key.replace(
-            "image_feature_encoders.0.module.lc", "image_feature_encoders.0.lc"
-        )
-        return key
-
     def build(self):
         self.image_feature_dim = 2048
         self._build_word_embedding()
diff --git a/mmf/models/pythia.py b/mmf/models/pythia.py
@@ -30,11 +30,7 @@ def config_path(cls):
 
     @classmethod
     def format_state_key(cls, key):
-        key = key.replace("fa_history", "fa_context")
-        key = key.replace(
-            "image_feature_encoders.0.module.lc", "image_feature_encoders.0.lc"
-        )
-        return key
+        return key.replace("fa_history", "fa_context")
 
     def build(self):
         self._build_word_embedding()
diff --git a/mmf/modules/encoders.py b/mmf/modules/encoders.py
@@ -125,6 +125,32 @@ def __init__(self, config: Config, *args, **kwargs):
         self.lc.bias.data.copy_(torch.from_numpy(bias))
         self.out_dim = out_dim
 
+    def _load_from_state_dict(
+        self,
+        state_dict,
+        prefix,
+        local_metadata,
+        strict,
+        missing_keys,
+        unexpected_keys,
+        error_msgs,
+    ):
+        old_prefix = prefix + "module."
+        for k in list(state_dict.keys()):
+            if k.startswith(old_prefix):
+                new_k = k.replace(old_prefix, prefix)
+                state_dict[new_k] = state_dict.pop(k)
+
+        super()._load_from_state_dict(
+            state_dict,
+            prefix,
+            local_metadata,
+            strict,
+            missing_keys,
+            unexpected_keys,
+            error_msgs,
+        )
+
     def forward(self, image):
         i2 = self.lc(image)
         i3 = nn.functional.relu(i2)
diff --git a/mmf/utils/checkpoint.py b/mmf/utils/checkpoint.py
@@ -321,11 +321,12 @@ def _load_pretrained(self, ckpt):
             key += "."
             value += "."
             for attr in ckpt:
+                if hasattr(model, "format_state_key"):
+                    formatted_attr = model.format_state_key(attr)
+                else:
+                    formatted_attr = attr
+
                 for own_attr in own_state:
-                    if hasattr(model, "format_state_key"):
-                        formatted_attr = model.format_state_key(attr)
-                    else:
-                        formatted_attr = attr
                     if (
                         key in own_attr
                         and value in formatted_attr