|
3 | 3 | import os
|
4 | 4 | import signal
|
5 | 5 | import sys
|
| 6 | +from pathlib import Path |
6 | 7 | from typing import Optional
|
7 | 8 |
|
8 | 9 | from openai import OpenAI
|
@@ -49,6 +50,19 @@ def interactive_cli(args: argparse.Namespace) -> None:
|
49 | 50 | chat(args.system_prompt, model_name, openai_client)
|
50 | 51 |
|
51 | 52 |
|
| 53 | +def tgis_cli(args: argparse.Namespace) -> None: |
| 54 | + registrer_signal_handlers() |
| 55 | + |
| 56 | + if args.command == "download-weights": |
| 57 | + download_weights(args.model_name, args.revision, args.token, |
| 58 | + args.extension, args.auto_convert) |
| 59 | + elif args.command == "convert-to-safetensors": |
| 60 | + convert_to_safetensors(args.model_name, args.revision) |
| 61 | + elif args.command == "convert-to-fast-tokenizer": |
| 62 | + convert_to_fast_tokenizer(args.model_name, args.revision, |
| 63 | + args.output_path) |
| 64 | + |
| 65 | + |
52 | 66 | def complete(model_name: str, client: OpenAI) -> None:
|
53 | 67 | print("Please enter prompt to complete:")
|
54 | 68 | while True:
|
@@ -82,6 +96,151 @@ def chat(system_prompt: Optional[str], model_name: str,
|
82 | 96 | print(output)
|
83 | 97 |
|
84 | 98 |
|
| 99 | +def download_weights( |
| 100 | + model_name: str, |
| 101 | + revision: Optional[str] = None, |
| 102 | + token: Optional[str] = None, |
| 103 | + extension: str = ".safetensors", |
| 104 | + auto_convert: bool = True, |
| 105 | +) -> None: |
| 106 | + from vllm.tgis_utils import hub |
| 107 | + |
| 108 | + print(extension) |
| 109 | + meta_exts = [".json", ".py", ".model", ".md"] |
| 110 | + |
| 111 | + extensions = extension.split(",") |
| 112 | + |
| 113 | + if len(extensions) == 1 and extensions[0] not in meta_exts: |
| 114 | + extensions.extend(meta_exts) |
| 115 | + |
| 116 | + files = hub.download_weights(model_name, |
| 117 | + extensions, |
| 118 | + revision=revision, |
| 119 | + auth_token=token) |
| 120 | + |
| 121 | + if auto_convert and ".safetensors" in extensions: |
| 122 | + if not hub.local_weight_files(hub.get_model_path(model_name, revision), |
| 123 | + ".safetensors"): |
| 124 | + if ".bin" not in extensions: |
| 125 | + print(".safetensors weights not found, \ |
| 126 | + downloading pytorch weights to convert...") |
| 127 | + hub.download_weights(model_name, |
| 128 | + ".bin", |
| 129 | + revision=revision, |
| 130 | + auth_token=token) |
| 131 | + |
| 132 | + print(".safetensors weights not found, \ |
| 133 | + converting from pytorch weights...") |
| 134 | + convert_to_safetensors(model_name, revision) |
| 135 | + elif not any(f.endswith(".safetensors") for f in files): |
| 136 | + print(".safetensors weights not found on hub, \ |
| 137 | + but were found locally. Remove them first to re-convert") |
| 138 | + if auto_convert: |
| 139 | + convert_to_fast_tokenizer(model_name, revision) |
| 140 | + |
| 141 | + |
| 142 | +def convert_to_safetensors( |
| 143 | + model_name: str, |
| 144 | + revision: Optional[str] = None, |
| 145 | +): |
| 146 | + from vllm.tgis_utils import hub |
| 147 | + |
| 148 | + # Get local pytorch file paths |
| 149 | + model_path = hub.get_model_path(model_name, revision) |
| 150 | + local_pt_files = hub.local_weight_files(model_path, ".bin") |
| 151 | + local_pt_index_files = hub.local_index_files(model_path, ".bin") |
| 152 | + if len(local_pt_index_files) > 1: |
| 153 | + print( |
| 154 | + f"Found more than one .bin.index.json file: {local_pt_index_files}" |
| 155 | + ) |
| 156 | + return |
| 157 | + if not local_pt_files: |
| 158 | + print("No pytorch .bin files found to convert") |
| 159 | + return |
| 160 | + |
| 161 | + local_pt_files = [Path(f) for f in local_pt_files] |
| 162 | + local_pt_index_file = local_pt_index_files[ |
| 163 | + 0] if local_pt_index_files else None |
| 164 | + |
| 165 | + # Safetensors final filenames |
| 166 | + local_st_files = [ |
| 167 | + p.parent / f"{p.stem.removeprefix('pytorch_')}.safetensors" |
| 168 | + for p in local_pt_files |
| 169 | + ] |
| 170 | + |
| 171 | + if any(os.path.exists(p) for p in local_st_files): |
| 172 | + print("Existing .safetensors weights found, \ |
| 173 | + remove them first to reconvert") |
| 174 | + return |
| 175 | + |
| 176 | + try: |
| 177 | + import transformers |
| 178 | + |
| 179 | + config = transformers.AutoConfig.from_pretrained( |
| 180 | + model_name, |
| 181 | + revision=revision, |
| 182 | + ) |
| 183 | + architecture = config.architectures[0] |
| 184 | + |
| 185 | + class_ = getattr(transformers, architecture) |
| 186 | + |
| 187 | + # Name for this variable depends on transformers version |
| 188 | + discard_names = getattr(class_, "_tied_weights_keys", []) |
| 189 | + discard_names.extend( |
| 190 | + getattr(class_, "_keys_to_ignore_on_load_missing", [])) |
| 191 | + |
| 192 | + except Exception: |
| 193 | + discard_names = [] |
| 194 | + |
| 195 | + if local_pt_index_file: |
| 196 | + local_pt_index_file = Path(local_pt_index_file) |
| 197 | + st_prefix = local_pt_index_file.stem.removeprefix( |
| 198 | + "pytorch_").removesuffix(".bin.index") |
| 199 | + local_st_index_file = (local_pt_index_file.parent / |
| 200 | + f"{st_prefix}.safetensors.index.json") |
| 201 | + |
| 202 | + if os.path.exists(local_st_index_file): |
| 203 | + print("Existing .safetensors.index.json file found, \ |
| 204 | + remove it first to reconvert") |
| 205 | + return |
| 206 | + |
| 207 | + hub.convert_index_file(local_pt_index_file, local_st_index_file, |
| 208 | + local_pt_files, local_st_files) |
| 209 | + |
| 210 | + # Convert pytorch weights to safetensors |
| 211 | + hub.convert_files(local_pt_files, local_st_files, discard_names) |
| 212 | + |
| 213 | + |
| 214 | +def convert_to_fast_tokenizer( |
| 215 | + model_name: str, |
| 216 | + revision: Optional[str] = None, |
| 217 | + output_path: Optional[str] = None, |
| 218 | +): |
| 219 | + from vllm.tgis_utils import hub |
| 220 | + |
| 221 | + # Check for existing "tokenizer.json" |
| 222 | + model_path = hub.get_model_path(model_name, revision) |
| 223 | + |
| 224 | + if os.path.exists(os.path.join(model_path, "tokenizer.json")): |
| 225 | + print(f"Model {model_name} already has a fast tokenizer") |
| 226 | + return |
| 227 | + |
| 228 | + if output_path is not None: |
| 229 | + if not os.path.isdir(output_path): |
| 230 | + print(f"Output path {output_path} must exist and be a directory") |
| 231 | + return |
| 232 | + else: |
| 233 | + output_path = model_path |
| 234 | + |
| 235 | + import transformers |
| 236 | + |
| 237 | + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, |
| 238 | + revision=revision) |
| 239 | + tokenizer.save_pretrained(output_path) |
| 240 | + |
| 241 | + print(f"Saved tokenizer to {output_path}") |
| 242 | + |
| 243 | + |
85 | 244 | def _add_query_options(
|
86 | 245 | parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
87 | 246 | parser.add_argument(
|
@@ -142,6 +301,37 @@ def main():
|
142 | 301 | "used for models that support system prompts."))
|
143 | 302 | chat_parser.set_defaults(dispatch_function=interactive_cli, command="chat")
|
144 | 303 |
|
| 304 | + download_weights_parser = subparsers.add_parser( |
| 305 | + "download-weights", |
| 306 | + help=("Download the weights of a given model"), |
| 307 | + usage="vllm download-weights <model_name> [options]") |
| 308 | + download_weights_parser.add_argument("model_name") |
| 309 | + download_weights_parser.add_argument("--revision") |
| 310 | + download_weights_parser.add_argument("--token") |
| 311 | + download_weights_parser.add_argument("--extension", default=".safetensors") |
| 312 | + download_weights_parser.add_argument("--auto_convert", default=True) |
| 313 | + download_weights_parser.set_defaults(dispatch_function=tgis_cli, |
| 314 | + command="download-weights") |
| 315 | + |
| 316 | + convert_to_safetensors_parser = subparsers.add_parser( |
| 317 | + "convert-to-safetensors", |
| 318 | + help=("Convert model weights to safetensors"), |
| 319 | + usage="vllm convert-to-safetensors <model_name> [options]") |
| 320 | + convert_to_safetensors_parser.add_argument("model_name") |
| 321 | + convert_to_safetensors_parser.add_argument("--revision") |
| 322 | + convert_to_safetensors_parser.set_defaults( |
| 323 | + dispatch_function=tgis_cli, command="convert-to-safetensors") |
| 324 | + |
| 325 | + convert_to_fast_tokenizer_parser = subparsers.add_parser( |
| 326 | + "convert-to-fast-tokenizer", |
| 327 | + help=("Convert to fast tokenizer"), |
| 328 | + usage="vllm convert-to-fast-tokenizer <model_name> [options]") |
| 329 | + convert_to_fast_tokenizer_parser.add_argument("model_name") |
| 330 | + convert_to_fast_tokenizer_parser.add_argument("--revision") |
| 331 | + convert_to_fast_tokenizer_parser.add_argument("--output_path") |
| 332 | + convert_to_fast_tokenizer_parser.set_defaults( |
| 333 | + dispatch_function=tgis_cli, command="convert-to-fast-tokenizer") |
| 334 | + |
145 | 335 | args = parser.parse_args()
|
146 | 336 | # One of the sub commands should be executed.
|
147 | 337 | if hasattr(args, "dispatch_function"):
|
|
0 commit comments