Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
106 commits
Select commit Hold shift + click to select a range
12f97c8
use exact model name
tajimagrp Nov 26, 2024
c4cb50b
Update save.py
danielhanchen Dec 27, 2024
75e4756
Update _utils.py
danielhanchen Dec 27, 2024
e86b18f
Update _utils.py
danielhanchen Dec 27, 2024
f565ccf
Update _utils.py
danielhanchen Dec 27, 2024
c5d0aa9
Update _utils.py
danielhanchen Dec 27, 2024
af7d6cc
print
danielhanchen Dec 27, 2024
281cb73
Update _utils.py
danielhanchen Dec 27, 2024
b60acda
Update _utils.py
danielhanchen Dec 27, 2024
855d0f8
Update llama.py
danielhanchen Dec 27, 2024
fe4e9b8
Update _utils.py
danielhanchen Dec 27, 2024
48161a2
Update vision.py
danielhanchen Dec 27, 2024
52b2451
Update _utils.py
danielhanchen Dec 27, 2024
8d39e73
Update _utils.py
danielhanchen Dec 27, 2024
a7e5803
Update _utils.py
danielhanchen Dec 27, 2024
5038ba7
Update _utils.py
danielhanchen Dec 27, 2024
0882287
Update _utils.py
danielhanchen Dec 27, 2024
ab71dce
Update _utils.py
danielhanchen Dec 27, 2024
dd054c3
Update _utils.py
danielhanchen Dec 27, 2024
6c80d0f
Update _utils.py
danielhanchen Dec 27, 2024
ea8e8a2
Update loader.py
danielhanchen Dec 27, 2024
33ed089
accurate_accumulation
danielhanchen Dec 28, 2024
c3b41b8
Update loader.py
danielhanchen Dec 28, 2024
142f026
Update loader.py
danielhanchen Dec 28, 2024
eecab40
Update _utils.py
danielhanchen Dec 28, 2024
8cec2fa
Update loader.py
danielhanchen Dec 28, 2024
c68007c
Update loader.py
danielhanchen Dec 29, 2024
5495311
Update loader.py
danielhanchen Dec 29, 2024
ea2c647
Update loader.py
danielhanchen Dec 29, 2024
f1da2a6
Update pyproject.toml
danielhanchen Dec 29, 2024
dc9efb4
Merge branch 'main' into nightly
danielhanchen Dec 29, 2024
3e1dbab
Update __init__.py
danielhanchen Dec 30, 2024
a0d39ff
Update pyproject.toml
danielhanchen Dec 30, 2024
c3d4e18
Update __init__.py
danielhanchen Dec 30, 2024
7d7a1b0
Update __init__.py
danielhanchen Dec 30, 2024
bfce3d4
Fix Triton heuristics
danielhanchen Dec 30, 2024
743106e
Update __init__.py
danielhanchen Dec 30, 2024
4e0986f
Update __init__.py
danielhanchen Dec 31, 2024
abebd11
Update __init__.py
danielhanchen Dec 31, 2024
f021609
Update __init__.py
danielhanchen Dec 31, 2024
512773e
Xformers
danielhanchen Jan 1, 2025
b4549cd
Update loader.py
danielhanchen Jan 1, 2025
6760499
Update loader.py
danielhanchen Jan 2, 2025
c25f20c
Rewind
danielhanchen Jan 2, 2025
c90b3bf
Update _utils.py
danielhanchen Jan 2, 2025
9379522
Update _utils.py
danielhanchen Jan 2, 2025
9a66c6f
requires grad
danielhanchen Jan 3, 2025
bb9ab04
Update loader.py
danielhanchen Jan 3, 2025
3e096ac
Update _utils.py
danielhanchen Jan 4, 2025
99898da
Update loader.py
danielhanchen Jan 5, 2025
3f251f5
Merge branch 'pr/1339' into nightly
danielhanchen Jan 5, 2025
86ab9f1
changing model to base_model if peft model is already used
mosama1994 Jan 6, 2025
7134fee
Merge branch 'main' into nightly
danielhanchen Jan 7, 2025
039a507
Improve debugging experience (#1512)
Erland366 Jan 7, 2025
f40558f
Update loader.py
danielhanchen Jan 7, 2025
a229db5
Update llama.py
danielhanchen Jan 7, 2025
b7ddf96
Update llama.py
danielhanchen Jan 7, 2025
2b5d470
Revert "Update llama.py"
danielhanchen Jan 7, 2025
52d2895
Update llama.py
danielhanchen Jan 7, 2025
777f209
Merge branch 'pr/1509' into nightly
danielhanchen Jan 7, 2025
1e8cf02
Update llama.py
danielhanchen Jan 7, 2025
cef7e58
Update llama.py
danielhanchen Jan 7, 2025
ca8e92c
Update llama.py
danielhanchen Jan 7, 2025
dbef42d
Update llama.py
danielhanchen Jan 7, 2025
0dd136d
Update llama.py
danielhanchen Jan 7, 2025
3369f00
Update llama.py
danielhanchen Jan 7, 2025
61ecb22
Update llama.py
danielhanchen Jan 7, 2025
ec03332
Update llama.py
danielhanchen Jan 7, 2025
fa02ce1
Update llama.py
danielhanchen Jan 7, 2025
06d4057
Update llama.py
danielhanchen Jan 7, 2025
5004796
Update llama.py
danielhanchen Jan 7, 2025
2608fe4
Update llama.py
danielhanchen Jan 7, 2025
2b3391f
Auto change is_bfloat16_supported
danielhanchen Jan 7, 2025
a1b897e
Update llama.py
danielhanchen Jan 7, 2025
ce84095
Force data-type
danielhanchen Jan 7, 2025
ad31cb6
Update llama.py
danielhanchen Jan 7, 2025
d7a2057
All attention refactor fix (#1491)
KareemMusleh Jan 7, 2025
0cb9c5f
Update llama.py
danielhanchen Jan 7, 2025
e3a92e0
Update llama.py
danielhanchen Jan 7, 2025
422c033
Update granite to work with latest post_patch methods (#1502)
Datta0 Jan 7, 2025
83b48a8
Minor fixes for granite models (#1503)
CoffeeVampir3 Jan 7, 2025
e0ccfaf
support modelscope models and datasets (#1481)
tastelikefeet Jan 7, 2025
63ad366
Merge branch 'main' into nightly
danielhanchen Jan 8, 2025
a7d7838
Phi 4
danielhanchen Jan 8, 2025
62f074c
Merge branch 'main' into nightly
danielhanchen Jan 8, 2025
8d28389
Merge branch 'main' into nightly
danielhanchen Jan 15, 2025
2ced650
Update llama.py
danielhanchen Jan 15, 2025
a76953a
Merge branch 'main' into nightly
danielhanchen Jan 16, 2025
dd9b4e1
Torch.Cuda Is Available Condition and Warning (#1545)
aminwhat Jan 16, 2025
bc37b7a
Update mistral.py
danielhanchen Jan 16, 2025
2e7a886
Update mistral.py
danielhanchen Jan 16, 2025
15e6036
Update _utils.py
danielhanchen Jan 16, 2025
0b6bb12
Update _utils.py
danielhanchen Jan 16, 2025
76403f9
Update _utils.py
danielhanchen Jan 16, 2025
3c4ef99
Update _utils.py
danielhanchen Jan 16, 2025
b4c0b02
Update _utils.py
danielhanchen Jan 16, 2025
24a24bf
Fix
danielhanchen Jan 16, 2025
a953bfc
Bug fixes
danielhanchen Jan 16, 2025
e6d677b
Update mapper.py
danielhanchen Jan 19, 2025
d8d8bdc
Add dropout to granite to match HF's implementation (#1557)
Datta0 Jan 19, 2025
aa53ed4
Merge branch 'nightly' of https://github.com/unslothai/unsloth into n…
danielhanchen Jan 19, 2025
f42d0e9
Update llama.py
danielhanchen Jan 19, 2025
a2b55ef
Merge branch 'main' into nightly
danielhanchen Jan 20, 2025
b667bc6
Update llama.py
danielhanchen Jan 20, 2025
1ce40ce
Bug fixes
danielhanchen Jan 20, 2025
cdb3259
fix: flash_attn_detection_error (#1556)
Zzhiter Jan 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ triton = [
"triton @ https://github.com/woct0rdho/triton-windows/releases/download/v3.1.0-windows.post5/triton-3.1.0-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'",
]
huggingface = [
"unsloth_zoo>=2025.1.2",
"unsloth_zoo>=2025.1.4",
"packaging",
"tyro",
"transformers>=4.46.1,!=4.47.0",
Expand Down Expand Up @@ -285,7 +285,7 @@ colab-ampere-torch220 = [
"flash-attn>=2.6.3",
]
colab-new = [
"unsloth_zoo>=2025.1.2",
"unsloth_zoo>=2025.1.4",
"packaging",
"tyro",
"transformers>=4.46.1,!=4.47.0",
Expand Down
6 changes: 5 additions & 1 deletion unsloth/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@
del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
pass

# First check if CUDA is available ie a NVIDIA GPU is seen
if not torch.cuda.is_available():
raise NotImplementedError("Unsloth: No NVIDIA GPU found? Unsloth currently only supports GPUs!")

# Fix Xformers performance issues since 0.0.25
import importlib.util
from pathlib import Path
Expand Down Expand Up @@ -194,7 +198,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
# Check for unsloth_zoo
try:
unsloth_zoo_version = importlib_version("unsloth_zoo")
if Version(unsloth_zoo_version) < Version("2025.1.2"):
if Version(unsloth_zoo_version) < Version("2025.1.4"):
try:
os.system("pip install --upgrade --no-cache-dir --no-deps unsloth_zoo")
except:
Expand Down
12 changes: 9 additions & 3 deletions unsloth/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "2025.1.5"
__version__ = "2025.1.6"

__all__ = [
"SUPPORTS_BFLOAT16",
Expand Down Expand Up @@ -285,7 +285,11 @@ def _is_openai_available(): return False
if _is_package_available("flash_attn"):
# Check for CUDA linking errors "undefined symbol: _ZNK3c106SymIntltEl"
try:
from flash_attn.flash_attn_interface import flash_attn_cuda
try:
# See https://github.com/unslothai/unsloth/issues/1437
from flash_attn.flash_attn_interface import flash_attn_gpu
except:
from flash_attn.flash_attn_interface import flash_attn_cuda
HAS_FLASH_ATTENTION = True

# Also check for softcapping
Expand Down Expand Up @@ -843,7 +847,9 @@ def patch_linear_scaling(
"self.rotary_emb = .+?\)", function,
flags = re.DOTALL | re.MULTILINE,
)
if len(rotary_emb) == 0: return None, function
if len(rotary_emb) == 0:
return None, exec_code + "\n\n" + function

rotary_emb = rotary_emb[0]
function = function.replace(rotary_emb, fix_rope_function, 1)
function = exec_code + "\n\n" + function
Expand Down
7 changes: 4 additions & 3 deletions unsloth/models/granite.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def GraniteAttention_fast_forward(
n_groups = self.num_key_value_groups
n_kv_heads = self.config.num_key_value_heads
head_dim = self.head_dim
dropout_p = self.config.attention_dropout if self.training else 0
assert(n_kv_heads * n_groups == n_heads)

Q, K, V = self.apply_qkv(self, hidden_states)
Expand Down Expand Up @@ -135,15 +136,15 @@ def GraniteAttention_fast_forward(
Q = Q.view(bsz, q_len, n_kv_heads, n_groups, head_dim)
pass

A = xformers_attention(Q, K, V, attn_bias = causal_mask, scale=self.scaling)
A = xformers_attention(Q, K, V, attn_bias = causal_mask, scale=self.scaling, p=dropout_p)
A = A.view(bsz, q_len, n_heads, head_dim)

elif HAS_FLASH_ATTENTION and attention_mask is None:
Q = Q.transpose(1, 2)
K = K.transpose(1, 2)
V = V.transpose(1, 2)
window = (kv_seq_len, kv_seq_len)
A = flash_attn_func(Q, K, V, causal = True, window_size = window, softmax_scale=self.scaling)
A = flash_attn_func(Q, K, V, causal = True, window_size = window, softmax_scale=self.scaling, dropout_p=dropout_p)
else:
# Grouped query attention
# if n_groups != 1:
Expand All @@ -157,7 +158,7 @@ def GraniteAttention_fast_forward(
Q, K, V = Q.contiguous(), K.contiguous(), V.contiguous()
# Needs (batch_size, n_heads, seq_len, head_dim)
# is_casual and attention_mask must not be both set!
A = scaled_dot_product_attention(Q, K, V, attn_mask = attention_mask, scale = self.scaling, is_causal = False)
A = scaled_dot_product_attention(Q, K, V, attn_mask = attention_mask, scale = self.scaling, is_causal = False, dropout_p=dropout_p)
# Go back to (batch_size, seq_len, n_heads, head_dim)
A = A.transpose(1, 2).contiguous()
pass
Expand Down
8 changes: 6 additions & 2 deletions unsloth/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,7 @@ def LlamaModel_fast_forward(
IS_GEMMA2 = self.config.model_type.startswith("gemma2")
IS_COHERE = self.config.model_type.startswith("cohere")
IS_GRANITE = self.config.model_type.startswith("granite")

train_embed_tokens = self.embed_tokens.weight.requires_grad

if IS_GEMMA:
Expand Down Expand Up @@ -664,7 +665,7 @@ def LlamaModel_fast_forward(

# Fix up attention mask by setting elements to 0
# Specifically for DPO
if self._has_no_labels and (attention_mask is not None) and (past_key_values is None) and \
if getattr(self, "_has_no_labels", False) is True and (attention_mask is not None) and (past_key_values is None) and \
(not train_embed_tokens):
# Careful for inference the attention_mask is size (1, kv_seq_len)
# Whilst the input_embeds is size (1, 1, 4096)
Expand Down Expand Up @@ -792,9 +793,12 @@ def LlamaModel_fast_forward(
pass
pass

if IS_ATTENTION_REFACTOR and not hasattr(self.layers[0].self_attn, "rotary_emb"):
if (IS_ATTENTION_REFACTOR and (hasattr(self, "rotary_emb") or not hasattr(self.layers[0].self_attn, "rotary_emb"))) or IS_GRANITE:
# Transformers main has made it mandatory to pass position_embeddings
# https://github.com/huggingface/transformers/pull/34858
# Also, transformers 4.45.0 supports granite but with the attention refactor (it always had the refactor)
# unsloth's check for granite too has "version >= 4.45.0 (rightly so)".
# so let granite always use the attention refactor implementation.
position_embeddings = self.rotary_emb(hidden_states, position_ids, self.config.max_position_embeddings)
else:
position_embeddings = None
Expand Down
6 changes: 2 additions & 4 deletions unsloth/models/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,20 +471,18 @@
"meta-llama/Llama-3.2-11B-Vision-Instruct",
"unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
),
"unsloth/Llama-3.2-90B-Vision-Instruct-unsloth-bnb-4bit" : (
"unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit" : (
"unsloth/Llama-3.2-90B-Vision-Instruct",
"meta-llama/Llama-3.2-90B-Vision-Instruct",
"unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit",
),
"unsloth/Llama-3.2-11B-Vision-unsloth-bnb-4bit" : (
"unsloth/Llama-3.2-11B-Vision",
"meta-llama/Llama-3.2-11B-Vision",
"unsloth/Llama-3.2-11B-Vision-bnb-4bit",
),
"unsloth/Llama-3.2-90B-Vision-unsloth-bnb-4bit" : (
"unsloth/Llama-3.2-90B-Vision-bnb-4bit" : (
"unsloth/Llama-3.2-90B-Vision",
"meta-llama/Llama-3.2-90B-Vision",
"unsloth/Llama-3.2-90B-Vision-bnb-4bit",
),
"unsloth/Pixtral-12B-2409-unsloth-bnb-4bit" : (
"unsloth/Pixtral-12B-2409",
Expand Down
2 changes: 1 addition & 1 deletion unsloth/models/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def pre_patch():
attention_module = MistralAttention,
)
# Just for Mistral Nemo models!
if function is not None:
if function is not None and init_name is not None:
function = patch_mistral_nemo_attention(function)
# if True:#init_name is not None:
exec(function, globals())
Expand Down
Loading