|
1 | 1 | import argparse |
2 | 2 | import logging |
| 3 | +from pprint import pformat |
3 | 4 |
|
| 5 | +import kubeflow.katib as katib |
4 | 6 | from kubeflow.katib import KatibClient, search |
5 | 7 | from kubeflow.katib.types.types import TrainerResources |
6 | 8 | from kubernetes import client |
|
12 | 14 | # The default logging config. |
13 | 15 | logging.basicConfig(level=logging.INFO) |
14 | 16 |
|
15 | | - |
16 | 17 | def run_e2e_experiment_create_by_tune( |
17 | 18 | katib_client: KatibClient, |
18 | 19 | exp_name: str, |
@@ -53,9 +54,8 @@ def objective(parameters): |
53 | 54 | verify_experiment_results(katib_client, experiment, exp_name, exp_namespace) |
54 | 55 |
|
55 | 56 | # Print the Experiment and Suggestion. |
56 | | - logging.debug(katib_client.get_experiment(exp_name, exp_namespace)) |
57 | | - logging.debug(katib_client.get_suggestion(exp_name, exp_namespace)) |
58 | | - |
| 57 | + logging.debug("Experiment:\n%s", pformat(katib_client.get_experiment(exp_name, exp_namespace))) |
| 58 | + logging.debug("Suggestion:\n%s", pformat(katib_client.get_suggestion(exp_name, exp_namespace))) |
59 | 59 |
|
60 | 60 | def run_e2e_experiment_create_by_tune_pytorchjob( |
61 | 61 | katib_client: KatibClient, |
@@ -115,9 +115,85 @@ def objective(parameters): |
115 | 115 | verify_experiment_results(katib_client, experiment, exp_name, exp_namespace) |
116 | 116 |
|
117 | 117 | # Print the Experiment and Suggestion. |
118 | | - logging.debug(katib_client.get_experiment(exp_name, exp_namespace)) |
119 | | - logging.debug(katib_client.get_suggestion(exp_name, exp_namespace)) |
| 118 | + logging.debug("Experiment:\n%s", pformat(katib_client.get_experiment(exp_name, exp_namespace))) |
| 119 | + logging.debug("Suggestion:\n%s", pformat(katib_client.get_suggestion(exp_name, exp_namespace))) |
| 120 | + |
| 121 | +def run_e2e_experiment_create_by_tune_with_llm_optimization( |
| 122 | + katib_client: KatibClient, |
| 123 | + exp_name: str, |
| 124 | + exp_namespace: str, |
| 125 | +): |
| 126 | + import transformers |
| 127 | + from kubeflow.storage_initializer.hugging_face import ( |
| 128 | + HuggingFaceDatasetParams, |
| 129 | + HuggingFaceModelParams, |
| 130 | + HuggingFaceTrainerParams, |
| 131 | + ) |
| 132 | + from peft import LoraConfig |
| 133 | + |
| 134 | + # Create Katib Experiment and wait until it is finished. |
| 135 | + logging.debug("Creating Experiment: {}/{}".format(exp_namespace, exp_name)) |
| 136 | + |
| 137 | + # Use the test case from fine-tuning API tutorial. |
| 138 | + # https://www.kubeflow.org/docs/components/training/user-guides/fine-tuning/ |
| 139 | + # Create Katib Experiment. |
| 140 | + # And Wait until Experiment reaches Succeeded condition. |
| 141 | + katib_client.tune( |
| 142 | + name=exp_name, |
| 143 | + namespace=exp_namespace, |
| 144 | + # BERT model URI and type of Transformer to train it. |
| 145 | + model_provider_parameters=HuggingFaceModelParams( |
| 146 | + model_uri="hf://google-bert/bert-base-cased", |
| 147 | + transformer_type=transformers.AutoModelForSequenceClassification, |
| 148 | + num_labels=5, |
| 149 | + ), |
| 150 | + # In order to save test time, use 8 samples from Yelp dataset. |
| 151 | + dataset_provider_parameters=HuggingFaceDatasetParams( |
| 152 | + repo_id="yelp_review_full", |
| 153 | + split="train[:8]", |
| 154 | + ), |
| 155 | + # Specify HuggingFace Trainer parameters. |
| 156 | + trainer_parameters=HuggingFaceTrainerParams( |
| 157 | + training_parameters=transformers.TrainingArguments( |
| 158 | + output_dir="test_tune_api", |
| 159 | + save_strategy="no", |
| 160 | + learning_rate = search.double(min=1e-05, max=5e-05), |
| 161 | + num_train_epochs=1, |
| 162 | + ), |
| 163 | + # Set LoRA config to reduce number of trainable model parameters. |
| 164 | + lora_config=LoraConfig( |
| 165 | + r = search.int(min=8, max=32), |
| 166 | + lora_alpha=8, |
| 167 | + lora_dropout=0.1, |
| 168 | + bias="none", |
| 169 | + ), |
| 170 | + ), |
| 171 | + objective_metric_name = "train_loss", |
| 172 | + objective_type = "minimize", |
| 173 | + algorithm_name = "random", |
| 174 | + max_trial_count = 1, |
| 175 | + parallel_trial_count = 1, |
| 176 | + resources_per_trial=katib.TrainerResources( |
| 177 | + num_workers=1, |
| 178 | + num_procs_per_worker=1, |
| 179 | + resources_per_worker={"cpu": "2", "memory": "10G",}, |
| 180 | + ), |
| 181 | + storage_config={ |
| 182 | + "size": "10Gi", |
| 183 | + "access_modes": ["ReadWriteOnce"], |
| 184 | + }, |
| 185 | + retain_trials=True, |
| 186 | + ) |
| 187 | + experiment = katib_client.wait_for_experiment_condition( |
| 188 | + exp_name, exp_namespace, timeout=EXPERIMENT_TIMEOUT |
| 189 | + ) |
| 190 | + |
| 191 | + # Verify the Experiment results. |
| 192 | + verify_experiment_results(katib_client, experiment, exp_name, exp_namespace) |
120 | 193 |
|
| 194 | + # Print the Experiment and Suggestion. |
| 195 | + logging.debug("Experiment:\n%s", pformat(katib_client.get_experiment(exp_name, exp_namespace))) |
| 196 | + logging.debug("Suggestion:\n%s", pformat(katib_client.get_suggestion(exp_name, exp_namespace))) |
121 | 197 |
|
122 | 198 | if __name__ == "__main__": |
123 | 199 | parser = argparse.ArgumentParser() |
@@ -189,3 +265,19 @@ def objective(parameters): |
189 | 265 | logging.info("---------------------------------------------------------------") |
190 | 266 | logging.info("---------------------------------------------------------------") |
191 | 267 | katib_client.delete_experiment(exp_name, exp_namespace) |
| 268 | + |
| 269 | + exp_name = "tune-example-llm-optimization" |
| 270 | + exp_namespace = args.namespace |
| 271 | + try: |
| 272 | + run_e2e_experiment_create_by_tune_with_llm_optimization(katib_client, exp_name, exp_namespace) |
| 273 | + logging.info("---------------------------------------------------------------") |
| 274 | + logging.info(f"E2E is succeeded for Experiment created by tune: {exp_namespace}/{exp_name}") |
| 275 | + except Exception as e: |
| 276 | + logging.info("---------------------------------------------------------------") |
| 277 | + logging.info(f"E2E is failed for Experiment created by tune: {exp_namespace}/{exp_name}") |
| 278 | + raise e |
| 279 | + finally: |
| 280 | + # Delete the Experiment. |
| 281 | + logging.info("---------------------------------------------------------------") |
| 282 | + logging.info("---------------------------------------------------------------") |
| 283 | + katib_client.delete_experiment(exp_name, exp_namespace) |
0 commit comments