Add unit tests to ensure valid example configs (#120)

jon-tow · web-flow · commit b60f05e5b162 · 2022-12-03T14:14:29.000-05:00
diff --git a/configs/ppo_gptj.yml b/configs/ppo_gptj.yml
@@ -35,6 +35,10 @@ method:
   cliprange: 0.2  # clip range
   cliprange_value: 0.2  # clip range
   vf_coef: 0.2  # value term weight
+  scale_reward: False  # False | "ref" | "running" estimate against which to scale rewards
+  ref_mean: null
+  ref_std: null  # rescale rewards with this deviation
+  cliprange_reward: 10
   gen_kwargs:
     max_length: 48  # LM max sample gen length
     min_length: 48  # LM min sample gen length
diff --git a/examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml b/examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml
@@ -10,12 +10,11 @@ train:
   total_steps: 80000  # Train for max(epochs, total_steps)
   batch_size: 8  # batch size
 
-  lr_ramp_steps: 100  # learning rate warm up
-  lr_decay_steps: 79000  # learning rate decay
-  weight_decay: 1.0e-6  # weight decay param
-  learning_rate_init: 1.412e-4  # init learning rate
-  learning_rate_target: 1.412e-4  # target final learning rate
+  lr_init: 1.412e-4  # init learning rate
+  lr_target: 1.412e-4  # target final learning rate
   opt_betas: [0.9, 0.95] # adam betas
+  opt_eps: 1.0e-8  # adam eps
+  weight_decay: 1.0e-6  # weight decay param
 
   checkpoint_interval: 1000000  # checkpoint interval
   eval_interval: 16  # eval interval
@@ -36,6 +35,10 @@ method:
   cliprange: 0.2  # clip range
   cliprange_value: 0.2  # clip range
   vf_coef: 0.2  # value term weight
+  scale_reward: False # False|"ref"|"running" estimate against which to scale rewards
+  cliprange_reward: 10
+  ref_mean: null
+  ref_std: null
   gen_kwargs:
     max_length: 256  # LM max sample gen length
     min_length: 48  # LM min sample gen length
diff --git a/examples/experiments/grounded_program_synthesis/lang.py b/examples/experiments/grounded_program_synthesis/lang.py
@@ -1,8 +1,10 @@
-import random
 import copy
+import json
+import random
+from pathlib import Path
 from pprint import pprint
+
 from tqdm import tqdm
-import json
 from transformers import AutoTokenizer
 
 
@@ -388,5 +390,6 @@ def basic_stats(dataset, tokenizer):
     test_data = create_synthetic_dataset(2_000)
     print(f"Train data size: {len(train_data)}")
     print(f"Test data size: {len(test_data)}")
+    Path("dataset").mkdir(parents=True, exist_ok=True)
     write_to_json(train_data, "dataset/train.json")
     write_to_json(test_data, "dataset/test.json")
diff --git a/examples/experiments/grounded_program_synthesis/train_trlx.py b/examples/experiments/grounded_program_synthesis/train_trlx.py
@@ -49,7 +49,7 @@ def reward_fn(samples):
     return reward_list
 
 
-default_config = yaml.safe_load(open("config/trlx_ppo_config.yml"))
+default_config = yaml.safe_load(open("configs/trlx_ppo_config.yml"))
 
 
 def main(hparams={}):
@@ -60,7 +60,6 @@ def main(hparams={}):
     train_prompts = list(dataset.load_datapoints(split="train"))[:1000]
 
     model = trlx.train(
-        "reshinthadith/codegen_350M_list_manip_5_len",
         reward_fn=reward_fn,
         prompts=train_prompts,
         config=config,
diff --git a/tests/test_configs.py b/tests/test_configs.py
@@ -0,0 +1,36 @@
+import os
+
+from trlx.data.configs import TRLConfig
+from typing import List
+
+
+def _get_config_dirs(dir: str, config_dir_name: str = "configs") -> List[str]:
+    """Returns all sub-directories of `dir` named `configs`."""
+    config_dirs = []
+    for root, dirs, _ in os.walk(dir):
+        for d in dirs:
+            if d == config_dir_name:
+                config_dirs.append(os.path.join(root, d))
+    return config_dirs
+
+
+def _get_yaml_filepaths(dir: str) -> List[str]:
+    """Returns a list of `yml` filepaths in `dir`."""
+    filepaths = []
+    for file in os.listdir(dir):
+        if file.endswith(".yml"):
+            filepaths.append(os.path.join(dir, file))
+    return filepaths
+
+
+def test_repo_trl_configs():
+    """Tests to ensure all default configs in the repository are valid."""
+    config_dirs = ["configs", *_get_config_dirs("examples")]
+    config_files = sum(map(_get_yaml_filepaths, config_dirs), [])  # sum for flat-map behavior
+    for file in config_files:
+        assert os.path.isfile(file), f"Config file {file} does not exist."
+        assert file.endswith(".yml"), f"Config file {file} is not a yaml file."
+        try:
+            TRLConfig.load_yaml(file)
+        except Exception as e:
+            assert False, f"Failed to load config file `{file}` with error `{e}`"