Skip to content

chat template file from model causes failure #1202

@engelmi

Description

@engelmi

Issue Description

The new model store implicitly detects the chat template from the model (e.g. embedded in gguf or from ollama) and passes it to llama-run and llama-serve. However, recently this has led to cause issues and even failures (see steps to reproduce).
This seems to be independent of the model store, but really about the --chat-template-file passed to llama.cpp

Steps to reproduce the issue

Steps to reproduce the issue:
Simply running ramalama --use-model-store for models with custom chat templates such as smollm:135m or granite-code. This leads to the chat-template-file being passed to llama.cpp.

Describe the results you received

Using the chat template from the model (e.g. embedded in gguf) causes failure:

$ ramalama run smollm:135m
Failed to infer a tool call example (possible template bug)
🦭 > hi
A classic problem in physics that has puzzled scientists for centuries!

The problem of time dilation is one of the most mind-bending and intriguing mysteries in all of physics. It's a fundamental concept in modern physics, and I'll try to break it down for you in simple terms.

**What is time dilation?**
...
$ 🦭 > next
decode: n_tokens == 0
llama_decode: failed to decode, ret = -1
failed to decode
failed to generate response

Describe the results you expected

Not passing the chat template to llama-run yields far better results:

$ ramalama run smollm:135m
🦭 > hi
Hello! How can I help you today?
🦭 > 

ramalama info output

{
    "Accelerator": "none",
    "Engine": {
        "Info": {
            "host": {
                "arch": "amd64",
                "buildahVersion": "1.39.2",
                "cgroupControllers": [
                    "cpu",
                    "io",
                    "memory",
                    "pids"
                ],
                "cgroupManager": "systemd",
                "cgroupVersion": "v2",
                "conmon": {
                    "package": "conmon-2.1.13-1.fc41.x86_64",
                    "path": "/usr/bin/conmon",
                    "version": "conmon version 2.1.13, commit: "
                },
                "cpuUtilization": {
                    "idlePercent": 95.67,
                    "systemPercent": 0.82,
                    "userPercent": 3.51
                },
                "cpus": 16,
                "databaseBackend": "sqlite",
                "distribution": {
                    "distribution": "fedora",
                    "variant": "workstation",
                    "version": "41"
                },
                "eventLogger": "journald",
                "freeLocks": 2032,
                "hostname": "fedora",
                "idMappings": {
                    "gidmap": [
                        {
                            "container_id": 0,
                            "host_id": 1000,
                            "size": 1
                        },
                        {
                            "container_id": 1,
                            "host_id": 524288,
                            "size": 65536
                        }
                    ],
                    "uidmap": [
                        {
                            "container_id": 0,
                            "host_id": 1000,
                            "size": 1
                        },
                        {
                            "container_id": 1,
                            "host_id": 524288,
                            "size": 65536
                        }
                    ]
                },
                "kernel": "6.13.9-200.fc41.x86_64",
                "linkmode": "dynamic",
                "logDriver": "journald",
                "memFree": 11474231296,
                "memTotal": 65965858816,
                "networkBackend": "netavark",
                "networkBackendInfo": {
                    "backend": "netavark",
                    "dns": {
                        "package": "aardvark-dns-1.14.0-1.fc41.x86_64",
                        "path": "/usr/libexec/podman/aardvark-dns",
                        "version": "aardvark-dns 1.14.0"
                    },
                    "package": "netavark-1.14.1-1.fc41.x86_64",
                    "path": "/usr/libexec/podman/netavark",
                    "version": "netavark 1.14.1"
                },
                "ociRuntime": {
                    "name": "crun",
                    "package": "crun-1.20-2.fc41.x86_64",
                    "path": "/usr/bin/crun",
                    "version": "crun version 1.20\ncommit: 9c9a76ac11994701dd666c4f0b869ceffb599a66\nrundir: /run/user/1000/crun\nspec: 1.0.0\n+SYSTEMD +SELINUX +APPARMOR +CAP +SECCOMP +EBPF +CRIU +LIBKRUN +WASM:wasmedge +YAJL"
                },
                "os": "linux",
                "pasta": {
                    "executable": "/usr/bin/pasta",
                    "package": "passt-0^20250320.g32f6212-2.fc41.x86_64",
                    "version": ""
                },
                "remoteSocket": {
                    "exists": true,
                    "path": "/run/user/1000/podman/podman.sock"
                },
                "rootlessNetworkCmd": "pasta",
                "security": {
                    "apparmorEnabled": false,
                    "capabilities": "CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT",
                    "rootless": true,
                    "seccompEnabled": true,
                    "seccompProfilePath": "/usr/share/containers/seccomp.json",
                    "selinuxEnabled": true
                },
                "serviceIsRemote": false,
                "slirp4netns": {
                    "executable": "",
                    "package": "",
                    "version": ""
                },
                "swapFree": 8589930496,
                "swapTotal": 8589930496,
                "uptime": "31h 45m 4.00s (Approximately 1.29 days)",
                "variant": ""
            },
            "plugins": {
                "authorization": null,
                "log": [
                    "k8s-file",
                    "none",
                    "passthrough",
                    "journald"
                ],
                "network": [
                    "bridge",
                    "macvlan",
                    "ipvlan"
                ],
                "volume": [
                    "local"
                ]
            },
            "registries": {
                "search": [
                    "registry.fedoraproject.org",
                    "registry.access.redhat.com",
                    "docker.io"
                ]
            },
            "store": {
                "configFile": "/home/mengel/.config/containers/storage.conf",
                "containerStore": {
                    "number": 0,
                    "paused": 0,
                    "running": 0,
                    "stopped": 0
                },
                "graphDriverName": "overlay",
                "graphOptions": {},
                "graphRoot": "/home/mengel/.local/share/containers/storage",
                "graphRootAllocated": 1022488477696,
                "graphRootUsed": 82620403712,
                "graphStatus": {
                    "Backing Filesystem": "btrfs",
                    "Native Overlay Diff": "true",
                    "Supports d_type": "true",
                    "Supports shifting": "false",
                    "Supports volatile": "true",
                    "Using metacopy": "false"
                },
                "imageCopyTmpDir": "/var/tmp",
                "imageStore": {
                    "number": 48
                },
                "runRoot": "/run/user/1000/containers",
                "transientStore": false,
                "volumePath": "/home/mengel/.local/share/containers/storage/volumes"
            },
            "version": {
                "APIVersion": "5.4.1",
                "BuildOrigin": "Fedora Project",
                "Built": 1741651200,
                "BuiltTime": "Tue Mar 11 01:00:00 2025",
                "GitCommit": "b79bc8afe796cba51dd906270a7e1056ccdfcf9e",
                "GoVersion": "go1.23.7",
                "Os": "linux",
                "OsArch": "linux/amd64",
                "Version": "5.4.1"
            }
        },
        "Name": "podman"
    },
    "Image": "quay.io/ramalama/ramalama:0.7",
    "Runtime": "llama.cpp",
    "Store": "/home/mengel/.local/share/ramalama",
    "UseContainer": true,
    "Version": "0.7.4"
}

Upstream Latest Release

Yes

Additional environment details

Additional environment details

Additional information

Additional information like issue happens only occasionally or issue happens with a particular architecture or on a particular setting

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions