-
Notifications
You must be signed in to change notification settings - Fork 284
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Issue Description
Looking at ggml-org/llama.cpp#15971 , that fix may be needed to make smolvlm work again:
$ ramalama serve --image=quay.io/ramalama/intel-gpu:latest smolvlm
[...]
load_hparams: model size: 103.73 MiB
load_hparams: metadata size: 0.07 MiB
load_tensors: ffn up/down are swapped
alloc_compute_meta: SYCL0 compute buffer size = 60.00 MiB
alloc_compute_meta: CPU compute buffer size = 3.00 MiB
srv load_model: loaded multimodal model, '/mnt/models/mmproj-SmolVLM-500M-Instruct-Q8_0.gguf'
srv load_model: cache_reuse is not supported by multimodal, it will be disabled
srv init: initializing slots, n_slots = 1
slot init: id 0 | task -1 | new slot n_ctx_slot = 4096
/lib64/libggml-base.so(+0x2888) [0x7fdedad72888]
/lib64/libggml-base.so(ggml_print_backtrace+0x285) [0x7fdedad72865]
/lib64/libggml-base.so(+0x19d76) [0x7fdedad89d76]
/lib64/libstdc++.so.6(+0x1eb9c) [0x7fdeda61eb9c]
/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fdeda608d3a]
/lib64/libstdc++.so.6(+0x1ee48) [0x7fdeda61ee48]
llama-server() [0x5bb4cb]
llama-server() [0x5a9570]
llama-server() [0x5a2144]
llama-server() [0x57d038]
llama-server() [0x57c689]
llama-server() [0x56ee59]
llama-server() [0x563b6e]
llama-server() [0x563a67]
llama-server() [0x43a60a]
llama-server() [0x409614]
/lib64/libc.so.6(+0x3575) [0x7fdeda411575]
/lib64/libc.so.6(__libc_start_main+0x88) [0x7fdeda411628]
llama-server() [0x4064f5]
terminate called after throwing an instance of 'std::runtime_error'
what(): Value is not callable: null at row 1, column 72:
<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
^
{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
at row 1, column 42:
<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
^
{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
at row 1, column 42:
<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
^
{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
at row 1, column 13:
<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
^
{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
at row 1, column 1:
<|im_start|>{% for message in messages %}{{message['role'] | capitalize}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
^
{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
$
Steps to reproduce the issue
Try to run smolvlm:
ramalama serve smolvlm
Describe the results you received
llama-server terminates (see above)
Describe the results you expected
smolvlm runs, as documented in https://developers.redhat.com/articles/2025/06/20/unleashing-multimodal-magic-ramalama#multimodal
ramalama info output
{
"Accelerator": "intel",
"Config": {},
"Engine": {
"Info": {
"host": {
"arch": "amd64",
"buildahVersion": "1.41.4",
"cgroupControllers": [
"cpu",
"io",
"memory",
"pids"
],
"cgroupManager": "systemd",
"cgroupVersion": "v2",
"conmon": {
"package": "conmon-2.1.13-1.fc42.x86_64",
"path": "/usr/bin/conmon",
"version": "conmon version 2.1.13, commit: "
},
"cpuUtilization": {
"idlePercent": 98.88,
"systemPercent": 0.49,
"userPercent": 0.63
},
"cpus": 20,
"databaseBackend": "boltdb",
"distribution": {
"distribution": "fedora",
"variant": "workstation",
"version": "42"
},
"emulatedArchitectures": [
"linux/arm",
"linux/arm64",
"linux/arm64be",
"linux/loong64",
"linux/mips",
"linux/mips64",
"linux/ppc",
"linux/ppc64",
"linux/ppc64le",
"linux/riscv32",
"linux/riscv64",
"linux/s390x"
],
"eventLogger": "journald",
"freeLocks": 1978,
"hostname": "xxx",
"idMappings": {
"gidmap": [
{
"container_id": 0,
"host_id": 8001,
"size": 1
},
{
"container_id": 1,
"host_id": 100000,
"size": 65536
}
],
"uidmap": [
{
"container_id": 0,
"host_id": 8001,
"size": 1
},
{
"container_id": 1,
"host_id": 100000,
"size": 65536
}
]
},
"kernel": "6.16.7-200.fc42.x86_64",
"linkmode": "dynamic",
"logDriver": "journald",
"memFree": 1524236288,
"memTotal": 33003933696,
"networkBackend": "netavark",
"networkBackendInfo": {
"backend": "netavark",
"dns": {
"package": "aardvark-dns-1.16.0-1.fc42.x86_64",
"path": "/usr/libexec/podman/aardvark-dns",
"version": "aardvark-dns 1.16.0"
},
"package": "netavark-1.16.1-1.fc42.x86_64",
"path": "/usr/libexec/podman/netavark",
"version": "netavark 1.16.1"
},
"ociRuntime": {
"name": "crun",
"package": "crun-1.24-1.fc42.x86_64",
"path": "/usr/bin/crun",
"version": "crun version 1.24\ncommit: 54693209039e5e04cbe3c8b1cd5fe2301219f0a1\nrundir: /run/user/8001/crun\nspec: 1.0.0\n+SYSTEMD +SELINUX +APPARMOR +CAP +SECCOMP +EBPF +CRIU +LIBKRUN +WASM:wasmedge +YAJL"
},
"os": "linux",
"pasta": {
"executable": "/usr/bin/pasta",
"package": "passt-0^20250919.g623dbf6-1.fc42.x86_64",
"version": "pasta 0^20250919.g623dbf6-1.fc42.x86_64\nCopyright Red Hat\nGNU General Public License, version 2 or later\n <https://www.gnu.org/licenses/old-licenses/gpl-2.0.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
},
"remoteSocket": {
"exists": true,
"path": "/run/user/8001/podman/podman.sock"
},
"rootlessNetworkCmd": "pasta",
"security": {
"apparmorEnabled": false,
"capabilities": "CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT",
"rootless": true,
"seccompEnabled": true,
"seccompProfilePath": "/usr/share/containers/seccomp.json",
"selinuxEnabled": true
},
"serviceIsRemote": false,
"slirp4netns": {
"executable": "/usr/bin/slirp4netns",
"package": "slirp4netns-1.3.1-2.fc42.x86_64",
"version": "slirp4netns version 1.3.1\ncommit: e5e368c4f5db6ae75c2fce786e31eef9da6bf236\nlibslirp: 4.8.0\nSLIRP_CONFIG_VERSION_MAX: 5\nlibseccomp: 2.5.5"
},
"swapFree": 4490276864,
"swapTotal": 8589930496,
"uptime": "208h 44m 17.00s (Approximately 8.67 days)",
"variant": ""
},
"plugins": {
"authorization": null,
"log": [
"k8s-file",
"none",
"passthrough",
"journald"
],
"network": [
"bridge",
"macvlan",
"ipvlan"
],
"volume": [
"local"
]
},
"registries": {
"search": [
"registry.fedoraproject.org",
"registry.access.redhat.com",
"docker.io"
]
},
"store": {
"configFile": "/home/xxx/.config/containers/storage.conf",
"containerStore": {
"number": 64,
"paused": 0,
"running": 0,
"stopped": 64
},
"graphDriverName": "overlay",
"graphOptions": {},
"graphRoot": "/home/xxx/.local/share/containers/storage",
"graphRootAllocated": 3131945598976,
"graphRootUsed": 1522821898240,
"graphStatus": {
"Backing Filesystem": "extfs",
"Native Overlay Diff": "false",
"Supports d_type": "true",
"Supports shifting": "true",
"Supports volatile": "true",
"Using metacopy": "false"
},
"imageCopyTmpDir": "/var/tmp",
"imageStore": {
"number": 37
},
"runRoot": "/run/user/8001/containers",
"transientStore": false,
"volumePath": "/home/xxx/.local/share/containers/storage/volumes"
},
"version": {
"APIVersion": "5.6.1",
"BuildOrigin": "Fedora Project",
"Built": 1756944000,
"BuiltTime": "Wed Sep 3 20:00:00 2025",
"GitCommit": "1e2b2315150b2ffa0971596fb5da8cd83f3ce0e1",
"GoVersion": "go1.24.6",
"Os": "linux",
"OsArch": "linux/amd64",
"Version": "5.6.1"
}
},
"Name": "podman"
},
"Image": "quay.io/ramalama/intel-gpu:latest",
"Runtime": "llama.cpp",
"Selinux": false,
"Shortnames": {
"Files": [
"/home/xxx/.local/share/uv/tools/ramalama/share/ramalama/shortnames.conf"
],
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
"deepseek": "ollama://deepseek-r1",
"dragon": "huggingface://llmware/dragon-mistral-7b-v0/dragon-mistral-7b-q4_k_m.gguf",
"gemma3": "hf://ggml-org/gemma-3-4b-it-GGUF",
"gemma3:12b": "hf://ggml-org/gemma-3-12b-it-GGUF",
"gemma3:1b": "hf://ggml-org/gemma-3-1b-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
"gemma3:27b": "hf://ggml-org/gemma-3-27b-it-GGUF",
"gemma3:4b": "hf://ggml-org/gemma-3-4b-it-GGUF",
"gemma3n": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gemma3n:e2b": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf",
"gemma3n:e2b-it-f16": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-f16.gguf",
"gemma3n:e2b-it-q8_0": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf",
"gemma3n:e4b": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gemma3n:e4b-it-f16": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-f16.gguf",
"gemma3n:e4b-it-q8_0": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gpt-oss": "hf://ggml-org/gpt-oss-20b-GGUF",
"gpt-oss:120b": "hf://ggml-org/gpt-oss-120b-GGUF",
"gpt-oss:20b": "hf://ggml-org/gpt-oss-20b-GGUF",
"granite": "ollama://granite3.1-dense",
"granite-be-3.0:1b": "hf://taronaeo/Granite-3.0-1B-A400M-Instruct-BE-GGUF/granite-3.0-1b-a400m-instruct-be.Q2_K.gguf",
"granite-be-3.3:2b": "hf://taronaeo/Granite-3.3-2B-Instruct-BE-GGUF/granite-3.3-2b-instruct-be.Q4_K_M.gguf",
"granite-lab-7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite-lab-8b": "huggingface://ibm-granite/granite-3.3-8b-instruct-GGUF/granite-3.3-8b-instruct-Q4_K_M.gguf",
"granite-lab:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:2b": "ollama://granite3.1-dense:2b",
"granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:8b": "ollama://granite3.1-dense:8b",
"hermes": "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"ibm/granite": "ollama://granite3.1-dense:8b",
"ibm/granite:2b": "ollama://granite3.1-dense:2b",
"ibm/granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"ibm/granite:8b": "ollama://granite3.1-dense:8b",
"merlinite": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab-7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"mistral": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral-small3.1": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-IQ2_M.gguf",
"mistral-small3.1:24b": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-IQ2_M.gguf",
"mistral:7b": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral:7b-v1": "huggingface://TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
"mistral:7b-v2": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v3": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral_code_16k": "huggingface://TheBloke/Mistral-7B-Code-16K-qlora-GGUF/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"mistral_codealpaca": "huggingface://TheBloke/Mistral-7B-codealpaca-lora-GGUF/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"mixtao": "huggingface://MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"openchat": "huggingface://TheBloke/openchat-3.5-0106-GGUF/openchat-3.5-0106.Q4_K_M.gguf",
"openorca": "huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q4_K_M.gguf",
"phi2": "huggingface://MaziyarPanahi/phi-2-GGUF/phi-2.Q4_K_M.gguf",
"qwen2.5vl": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
"qwen2.5vl:2b": "hf://ggml-org/Qwen2.5-VL-2B-Instruct-GGUF",
"qwen2.5vl:32b": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
"qwen2.5vl:3b": "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF",
"qwen2.5vl:7b": "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF",
"smollm:135m": "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF",
"smolvlm": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
"smolvlm:256m": "hf://ggml-org/SmolVLM-256M-Instruct-GGUF",
"smolvlm:2b": "hf://ggml-org/SmolVLM-Instruct-GGUF",
"smolvlm:500m": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
"stories-be:260k": "hf://taronaeo/tinyllamas-BE/stories260K-be.gguf",
"tiny": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"tinyllama": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
}
},
"Store": "/home/xxx/.local/share/ramalama",
"UseContainer": true,
"Version": "0.12.3"
}Upstream Latest Release
No
Additional environment details
Using podman 5.6.1 from Fedora 42 (latest is 5.6.2)
Additional information
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working