Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@ pip install -r requirements.txt
1) Replace the last command above with

```
pip install -r requirements_nocuda.txt
pip install -r requirements_nowheels.txt
```

2) Manually install llama-cpp-python using the appropriate command for your hardware: [Installation from PyPI](https://github.com/abetlen/llama-cpp-python#installation-from-pypi).

3) Do the same for CTransformers: [Installation](https://github.com/marella/ctransformers#installation).

4) AMD: Manually install AutoGPTQ: [Installation](https://github.com/PanQiWei/AutoGPTQ#installation).
Expand Down
18 changes: 9 additions & 9 deletions modules/llamacpp_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@
from modules import RoPE, shared
from modules.logging_colors import logger

import llama_cpp

if torch.cuda.is_available() and not torch.version.hip:
try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None
else:
try:
import llama_cpp
except:
llama_cpp = None

try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None


def llama_cpp_lib():
if shared.args.cpu or llama_cpp_cuda is None:
if (shared.args.cpu and llama_cpp is not None) or llama_cpp_cuda is None:
return llama_cpp
else:
return llama_cpp_cuda
Expand Down
18 changes: 9 additions & 9 deletions modules/llamacpp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length

import llama_cpp

if torch.cuda.is_available() and not torch.version.hip:
try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None
else:
try:
import llama_cpp
except:
llama_cpp = None

try:
import llama_cpp_cuda
except:
llama_cpp_cuda = None


def llama_cpp_lib():
if shared.args.cpu or llama_cpp_cuda is None:
if (shared.args.cpu and llama_cpp is not None) or llama_cpp_cuda is None:
return llama_cpp
else:
return llama_cpp_cuda
Expand Down
86 changes: 53 additions & 33 deletions one_click.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import glob
import os
import platform
import site
import subprocess
import sys
Expand Down Expand Up @@ -34,6 +35,25 @@ def is_macos():
return sys.platform.startswith("darwin")


def is_x86_64():
return platform.machine() == "x86_64"


def cpu_has_avx2():
import cpuinfo

info = cpuinfo.get_cpu_info()
if 'avx2' in info['flags']:
return True

return False


def torch_version():
from torch import __version__ as torver
return torver


def is_installed():
for sitedir in site.getsitepackages():
if "site-packages" in sitedir and conda_env_path in sitedir:
Expand Down Expand Up @@ -137,7 +157,7 @@ def install_webui():
install_pytorch = "python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel_extension_for_pytorch==2.0.110+xpu -f https://developer.intel.com/ipex-whl-stable-xpu"

# Install Git and then Pytorch
run_cmd(f"{install_git} && {install_pytorch}", assert_success=True, environment=True)
run_cmd(f"{install_git} && {install_pytorch} && python -m pip install py-cpuinfo", assert_success=True, environment=True)

# Install the webui requirements
update_requirements(initial_installation=True)
Expand All @@ -162,7 +182,37 @@ def update_requirements(initial_installation=False):
if os.path.exists(extension_req_path):
run_cmd("python -m pip install -r " + extension_req_path + " --upgrade", assert_success=True, environment=True)

textgen_requirements = open("requirements.txt").read().splitlines()
# Detect the PyTorch version
torver = torch_version()
is_cuda = '+cu' in torver # 2.0.1+cu117
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
is_cpu = '+cpu' in torver # 2.0.1+cpu

if is_rocm:
if cpu_has_avx2():
requirements_file = "requirements_amd.txt"
else:
requirements_file = "requirements_amd_noavx2.txt"
elif is_cpu:
if cpu_has_avx2():
requirements_file = "requirements_cpu_only.txt"
else:
requirements_file = "requirements_cpu_only_noavx2.txt"
elif is_macos():
if is_x86_64():
requirements_file = "requirements_apple_intel.txt"
else:
requirements_file = "requirements_apple_silicon.txt"
else:
if cpu_has_avx2():
requirements_file = "requirements.txt"
else:
requirements_file = "requirements_noavx2.txt"

print(f"Using the following requirements file: {requirements_file}")

textgen_requirements = open(requirements_file).read().splitlines()

# Workaround for git+ packages not updating properly. Also store requirements.txt for later use
git_requirements = [req for req in textgen_requirements if req.startswith("git+")]
Expand All @@ -178,14 +228,7 @@ def update_requirements(initial_installation=False):
print(f"Uninstalled {package_name}")

# Install/update the project requirements
run_cmd("python -m pip install -r requirements.txt --upgrade", assert_success=True, environment=True)

# The following requirements are for CUDA, not CPU
# Parse output of 'pip show torch' to determine torch version
torver_cmd = run_cmd("python -m pip show torch", assert_success=True, environment=True, capture_output=True)
torver = [v.split()[1] for v in torver_cmd.stdout.decode('utf-8').splitlines() if 'Version:' in v][0]
is_cuda = '+cu' in torver
is_rocm = '+rocm' in torver
run_cmd(f"python -m pip install -r {requirements_file} --upgrade", assert_success=True, environment=True)

# Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
Expand Down Expand Up @@ -216,29 +259,6 @@ def update_requirements(initial_installation=False):
# Install the correct version of g++
run_cmd("conda install -y -k conda-forge::gxx_linux-64=11.2.0", environment=True)

if is_rocm:
# Pre-installed ExLlama module does not support AMD GPU
run_cmd("python -m pip uninstall -y exllama", environment=True)
# Get download URL for latest ExLlama ROCm wheel
exllama_rocm = run_cmd('curl -s https://api.github.com/repos/jllllll/exllama/releases/latest | grep browser_download_url | grep rocm5.4.2-cp310-cp310-linux_x86_64.whl | cut -d : -f 2,3 | tr -d \'"\'', environment=True, capture_output=True).stdout.decode('utf-8')
if 'rocm5.4.2-cp310-cp310-linux_x86_64.whl' in exllama_rocm:
run_cmd("python -m pip install " + exllama_rocm, environment=True)

# Install/Update ROCm AutoGPTQ for AMD GPUs
auto_gptq_version = [req for req in textgen_requirements if req.startswith('https://github.com/PanQiWei/AutoGPTQ/releases/download/')][0].split('/')[7]
auto_gptq_wheel = run_cmd(f'curl -s https://api.github.com/repos/PanQiWei/AutoGPTQ/releases/tags/{auto_gptq_version} | grep browser_download_url | grep rocm5.4.2-cp310-cp310-linux_x86_64.whl | cut -d : -f 2,3 | tr -d \'"\'', environment=True, capture_output=True).stdout.decode('utf-8')
if not auto_gptq_wheel and run_cmd(f"python -m pip install {auto_gptq_wheel} --force-reinstall --no-deps", environment=True).returncode != 0:
print_big_message("ERROR: AutoGPTQ wheel installation failed!\n You will not be able to use GPTQ-based models with AutoGPTQ.")

# Install GPTQ-for-LLaMa for ROCm
gptq_wheel = run_cmd('curl -s https://api.github.com/repos/jllllll/GPTQ-for-LLaMa-CUDA/releases/latest | grep browser_download_url | grep rocm5.4.2-cp310-cp310-linux_x86_64.whl | cut -d : -f 2,3 | tr -d \'"\'', environment=True, capture_output=True).stdout.decode('utf-8')
install_gptq = run_cmd("python -m pip install " + gptq_wheel, environment=True).returncode == 0
if install_gptq:
print("Wheel installation success!")
else:
print("ERROR: GPTQ wheel installation failed.")
print("You will not be able to use GPTQ-based models with GPTQ-for-LLaMa.")

clear_cache()


Expand Down
18 changes: 5 additions & 13 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,17 @@ wandb
bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"

# AutoGPTQ
# llama-cpp-python (CPU only, AVX2)
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.6/llama_cpp_python-0.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.6/llama_cpp_python-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows"

# CUDA wheels
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

# ExLlama
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

# llama-cpp-python without GPU support
llama-cpp-python==0.2.6; platform_system != "Windows"
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.6/llama_cpp_python-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows"

# llama-cpp-python with CUDA support
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.6+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.6+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

# GPTQ-for-LLaMa
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

# ctransformers
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu117-py3-none-any.whl
40 changes: 40 additions & 0 deletions requirements_amd.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
aiofiles==23.1.0
fastapi==0.95.2
gradio_client==0.2.5
gradio==3.33.1
pydantic==1.10.12

accelerate==0.23.*
colorama
datasets
einops
exllamav2==0.0.3
markdown
numpy==1.24
optimum==1.13.1
pandas
peft==0.5.*
Pillow>=9.5.0
pyyaml
requests
safetensors==0.3.2
transformers==4.33.*
scipy
sentencepiece
tensorboard
tqdm
wandb

# bitsandbytes
bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"

# llama-cpp-python (CPU only, AVX2)
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.6/llama_cpp_python-0.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.6/llama_cpp_python-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows"

# AMD wheels
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.6+rocm5.4.2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
39 changes: 39 additions & 0 deletions requirements_amd_noavx2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
aiofiles==23.1.0
fastapi==0.95.2
gradio_client==0.2.5
gradio==3.33.1
pydantic==1.10.12

accelerate==0.23.*
colorama
datasets
einops
exllamav2==0.0.3
markdown
numpy==1.24
optimum==1.13.1
pandas
peft==0.5.*
Pillow>=9.5.0
pyyaml
requests
safetensors==0.3.2
transformers==4.33.*
scipy
sentencepiece
tensorboard
tqdm
wandb

# bitsandbytes
bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"

# llama-cpp-python (CPU only)
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.6+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.6+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"

# AMD wheels
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
33 changes: 33 additions & 0 deletions requirements_apple_intel.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
aiofiles==23.1.0
fastapi==0.95.2
gradio_client==0.2.5
gradio==3.33.1
pydantic==1.10.12

accelerate==0.23.*
colorama
datasets
einops
exllamav2==0.0.3
markdown
numpy==1.24
optimum==1.13.1
pandas
peft==0.5.*
Pillow>=9.5.0
pyyaml
requests
safetensors==0.3.2
transformers==4.33.*
scipy
sentencepiece
tensorboard
tqdm
wandb

# bitsandbytes
bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"

# Mac wheels
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.6-cp310-cp310-macosx_11_0_x86_64.whl
33 changes: 33 additions & 0 deletions requirements_apple_silicon.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
aiofiles==23.1.0
fastapi==0.95.2
gradio_client==0.2.5
gradio==3.33.1
pydantic==1.10.12

accelerate==0.23.*
colorama
datasets
einops
exllamav2==0.0.3
markdown
numpy==1.24
optimum==1.13.1
pandas
peft==0.5.*
Pillow>=9.5.0
pyyaml
requests
safetensors==0.3.2
transformers==4.33.*
scipy
sentencepiece
tensorboard
tqdm
wandb

# bitsandbytes
bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"

# Mac wheels
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.6-cp310-cp310-macosx_11_0_arm64.whl
Loading