Skip to content

Commit 961c27d

Browse files
max tokens (#15)
1 parent a36a40c commit 961c27d

File tree

6 files changed

+176
-148
lines changed

6 files changed

+176
-148
lines changed

poetry.lock

Lines changed: 140 additions & 146 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "not-again-ai"
3-
version = "0.16.0"
3+
version = "0.16.1"
44
description = "Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place."
55
authors = [
66
{ name = "DaveCoDev", email = "[email protected]" }
@@ -40,7 +40,7 @@ poetry-plugin-export = ">=1.8"
4040

4141
[project.optional-dependencies]
4242
data = [
43-
"playwright>=1.49",
43+
"playwright>=1.50",
4444
"pytest-playwright>=0.7"
4545
]
4646
llm = [

src/not_again_ai/llm/chat_completion/providers/ollama_api.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"logit_bias": None,
2929
"top_logprobs": None,
3030
"presence_penalty": None,
31+
"max_tokens": "num_predict",
3132
}
3233

3334

@@ -45,6 +46,10 @@ def validate(request: ChatCompletionRequest) -> None:
4546
logger.warning("Parameter 'stop' needs to be a string and not a list. It will be ignored.")
4647
request.stop = None
4748

49+
# Raise an error if both "max_tokens" and "max_completion_tokens" are provided
50+
if request.max_tokens is not None and request.max_completion_tokens is not None:
51+
raise ValueError("`max_tokens` and `max_completion_tokens` cannot both be provided.")
52+
4853

4954
def ollama_chat_completion(
5055
request: ChatCompletionRequest,

src/not_again_ai/llm/chat_completion/providers/openai_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def validate(request: ChatCompletionRequest) -> None:
3131
if request.json_mode and request.structured_outputs is not None:
3232
raise ValueError("json_schema and json_mode cannot be used together.")
3333

34+
# Raise an error if both "max_tokens" and "max_completion_tokens" are provided
35+
if request.max_tokens is not None and request.max_completion_tokens is not None:
36+
raise ValueError("`max_tokens` and `max_completion_tokens` cannot both be provided.")
37+
3438

3539
def openai_chat_completion(
3640
request: ChatCompletionRequest,

src/not_again_ai/llm/chat_completion/types.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ class ChatCompletionRequest(BaseModel):
118118
top_k: int | None = Field(default=None)
119119
min_p: float | None = Field(default=None)
120120

121+
max_tokens: int | None = Field(
122+
default=None,
123+
description="Sometimes `max_completion_tokens` is not correctly supported so we provide this as a fallback.",
124+
)
125+
121126

122127
class ChatCompletionChoice(BaseModel):
123128
message: AssistantMessage

tests/llm/chat_completion/test_chat_completion.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,16 @@ def test_chat_completion_invalid_params(openai_aoai_client_fixture: Callable[...
789789
print(response.model_dump(mode="json", exclude_none=True))
790790

791791

792+
def test_chat_completion_max_tokens(openai_aoai_client_fixture: Callable[..., Any]) -> None:
793+
request = ChatCompletionRequest(
794+
model="gpt-4o-mini-2024-07-18",
795+
messages=[UserMessage(content="What is the capital of France?")],
796+
max_tokens=100,
797+
)
798+
response = chat_completion(request, "openai", openai_aoai_client_fixture)
799+
print(response.model_dump(mode="json", exclude_none=True))
800+
801+
792802
# region OpenAI
793803
@pytest.fixture(
794804
params=[
@@ -1059,4 +1069,14 @@ def test_chat_completion_ollama_vision_multiple_messages(ollama_client_fixture:
10591069
print(response.model_dump(mode="json", exclude_none=True))
10601070

10611071

1072+
def test_chat_completion_ollama_max_tokens(ollama_client_fixture: Callable[..., Any]) -> None:
1073+
request = ChatCompletionRequest(
1074+
model="llama3.2-vision:11b-instruct-q4_K_M",
1075+
messages=[UserMessage(content="What is the capital of France?")],
1076+
max_tokens=100,
1077+
)
1078+
response = chat_completion(request, "ollama", ollama_client_fixture)
1079+
print(response.model_dump(mode="json", exclude_none=True))
1080+
1081+
10621082
# endregion

0 commit comments

Comments
 (0)