CarperAI
diff --git a/‎configs/ilql_config.yml‎
Lines changed: 0 additions & 1 deletion b/‎configs/ilql_config.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎configs/nemo_ilql_config.yml‎
Lines changed: 0 additions & 1 deletion b/‎configs/nemo_ilql_config.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎configs/ppo_config.yml‎
Lines changed: 0 additions & 1 deletion b/‎configs/ppo_config.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎configs/ppo_gptj.yml‎
Lines changed: 0 additions & 1 deletion b/‎configs/ppo_gptj.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎configs/sft_config.yml‎
Lines changed: 0 additions & 1 deletion b/‎configs/sft_config.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎configs/test_config.yml‎
Lines changed: 1 addition & 2 deletions b/‎configs/test_config.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 0 additions & 1 deletion b/‎docs/source/index.rst‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/source/orchestrator.rst‎
Lines changed: 0 additions & 23 deletions b/‎docs/source/orchestrator.rst‎
Lines changed: 0 additions & 23 deletions
diff --git a/‎docs/source/pipeline.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/pipeline.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml‎
Lines changed: 0 additions & 1 deletion b/‎examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml‎
Lines changed: 0 additions & 1 deletion
@@ -8,7 +8,6 @@ train:
   eval_interval: 100
 
   pipeline: "PromptPipeline"
-  orchestrator: "OfflineOrchestrator"
   trainer: "AccelerateILQLTrainer"
   seed: 1000
 
 
@@ -7,7 +7,6 @@ train:
   eval_interval: 20
 
   pipeline: "PromptPipeline"
-  orchestrator: "OfflineOrchestrator"
   trainer: "NeMoILQLTrainer"
   trainer_kwargs:
     pretrained_model: "/mnt/nvme/home/uwu/nemo-megatron-gpt-20B/"
 
@@ -8,7 +8,6 @@ train:
   eval_interval: 100
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:
 
@@ -8,7 +8,6 @@ train:
   eval_interval: 16
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:
 
@@ -8,7 +8,6 @@ train:
   eval_interval: 100
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AccelerateSFTTrainer"
 
 model:
 
@@ -8,7 +8,6 @@ train:
   eval_interval: 128 # eval interval
 
   pipeline: "PromptPipeline" # prompt pipeline to load
-  orchestrator: "PPOOrchestrator" # orchestrator to load
   trainer: "AcceleratePPOTrainer" # Name of model trainer to load
 
 model:
@@ -36,7 +35,7 @@ scheduler:
 method:
   name: "ppoconfig" # Name of RL method config
   num_rollouts: 128 # Number of rollouts to collect per epoch
-  chunk_size: 128 # Number of rollouts to collect in one loop of orchestrator
+  chunk_size: 128 # Number of rollouts to collect in one loop
   ppo_epochs: 4 # Number of ppo epochs
   init_kl_coef: 0.2 # init kl coefficient
   target: 6 # target kl coefficient, set None for fixed kl coef
 
@@ -14,7 +14,6 @@ currently supports training using PPO or ILQL for models up to 20B using Acceler
 
    data
    models
-   orchestrator
    configs
    pipeline
    examples
 
@@ -4,7 +4,7 @@ Pipelines
 ************************
 
 Pipelines are how you read from a dataset with trlX. Rollout stores are how models store experiences created
-for them by the orchestrator. It is these experiences in their rollout store that they are trained on.
+for them. It is these experiences in their rollout store that they are trained on.
 
 **General**
 
 
@@ -8,7 +8,6 @@ train:
   eval_interval: 16
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model: