Skip to content

Conversation

@ritikpandey33
Copy link
Contributor

Description

Adds support for Fireworks DeepSeek V3.1 model by adding it to the DEEPSEEK_MODELS dictionary with the correct context window size (131,072 tokens).

Fixes #20006

Type of Change

  • Bug fix (non-breaking change which fixes an issue)

How Has This Been Tested?

  • I believe this change is already covered by existing unit tests
  • Tested locally with the change - model works as expected

Checklist:

  • I have performed a self-review of my own code
  • My changes generate no new warnings
  • New and existing unit tests pass locally with my changes

@dosubot dosubot bot added the size:XS This PR changes 0-9 lines, ignoring generated files. label Oct 6, 2025
@logan-markewich logan-markewich merged commit c424bf6 into run-llama:main Oct 6, 2025
10 checks passed
@ani0075saha
Copy link

ani0075saha commented Oct 7, 2025

The context length needs to be updated to 163840 @ritikpandey33 @logan-markewich

curl --request GET \
  --url https://api.fireworks.ai/v1/accounts/fireworks/models/deepseek-v3p1 \
  --header 'Authorization: Bearer <>'
{
  "baseModelDetails": {
    "checkpointFormat": "HUGGINGFACE",
    "defaultPrecision": "FP8_MM",
    "modelType": "deepseek_v3",
    "moe": true,
    "parameterCount": "671026419200",
    "supportsFireattention": true,
    "supportsMtp": false,
    "tunable": false,
    "worldSize": 1
  },
  "calibrated": true,
  "cluster": "",
  "contextLength": 163840,
  "conversationConfig": {
    "style": "jinja",
    "system": "",
    "template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- if tools is defined and tools and tools|length > 0 %}{{'\\n\\n'}}## Tools{{'\\n'}}You have access to the following tools:{%- for tool in tools %}{% set f = tool['function'] if tool['function'] is defined else tool %}{{'\\n\\n'}}### {{ f['name'] }}{{'\\n'}}Description: {{ f['description'] | default('') }}{{'\\n\\n'}}Parameters: {{ f['parameters'] | tojson }}{%- endfor %}{{'\\n\\n'}}IMPORTANT: ALWAYS adhere to this exact format for tool use:{{'\\n'}}<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{% raw %}{additional_tool_calls}{% endraw %}<|tool▁calls▁end|>{{'\\n\\n'}}Where:{{'\\n'}}- `tool_call_name` must be an exact match to one of the available tools{{'\\n'}}- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema{{'\\n'}}- For multiple tool calls, chain them directly without separators or spaces{%- endif %}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}}  {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}"
  },
  "createTime": "2025-08-21T06:49:48.289944Z",
  "defaultDraftModel": "",
  "defaultDraftTokenCount": 0,
  "defaultSamplingParams": {},
  "deployedModelRefs": [
    {
      "default": false,
      "deployment": "accounts/fireworks/deployments/f5ojqawg",
      "name": "accounts/fireworks/deployedModels/deepseek-v3p1-kpzimxal",
      "public": true,
      "state": "DEPLOYED"
    },
    {
      "default": true,
      "deployment": "accounts/fireworks/deployments/qjeezpms",
      "name": "accounts/fireworks/deployedModels/deepseek-v3p1-vmvkddt4",
      "public": true,
      "state": "DEPLOYED"
    },
    {
      "default": false,
      "deployment": "accounts/fireworks/deployments/ckm3gncc",
      "name": "accounts/fireworks/deployedModels/deepseek-v3p1-je4zk2xr",
      "public": true,
      "state": "DEPLOYED"
    }
  ],
  "deprecationDate": null,
  "description": "DeepSeek-V3.1 is post-trained on the top of DeepSeek-V3.1-Base, which is built upon the original V3 base checkpoint through a two-phase long context extension approach, following the methodology outlined in the original DeepSeek-V3 report. We have expanded our dataset by collecting additional long documents and substantially extending both training phases. The 32K extension phase has been increased 10-fold to 630B tokens, while the 128K extension phase has been extended by 3.3x to 209B tokens. Additionally, DeepSeek-V3.1 is trained using the UE8M0 FP8 scale data format to ensure compatibility with microscaling data formats.",
  "displayName": "DeepSeek V3.1",
  "fineTuningJob": "",
  "githubUrl": "",
  "huggingFaceUrl": "https://huggingface.co/deepseek-ai/DeepSeek-V3.1",
  "importedFrom": "",
  "kind": "HF_BASE_MODEL",
  "name": "accounts/fireworks/models/deepseek-v3p1",
  "peftDetails": {
    "baseModel": "",
    "baseModelType": "",
    "mergeAddonModelName": "",
    "r": 0,
    "targetModules": []
  },
  "public": true,
  "rlTunable": false,
  "snapshotType": "FULL_SNAPSHOT",
  "state": "READY",
  "status": {
    "code": "OK",
    "message": ""
  },
  "supportedPrecisions": [
    "FP8_MM",
    "FP8_MM_V2",
    "PRECISION_UNSPECIFIED",
    "PRECISION_UNSPECIFIED"
  ],
  "supportedPrecisionsWithCalibration": [
    "FP4",
    "FP4_BLOCKSCALED_MM",
    "PRECISION_UNSPECIFIED"
  ],
  "supportsImageInput": false,
  "supportsLora": false,
  "supportsTools": true,
  "teftDetails": null,
  "trainingContextLength": 65536,
  "tunable": true,
  "updateTime": "2025-09-18T21:01:18.824589Z",
  "useHfApplyChatTemplate": false
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

size:XS This PR changes 0-9 lines, ignoring generated files.

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[Bug]: Cannot use Fireworks Deepseek V3.1

3 participants