Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5d246a7
convert : minor fixes for numpy 2.x (#23571)
CISC May 24, 2026
549b9d8
ci : update build-self-hosted.yml (#23616)
ggerganov May 24, 2026
28123a3
ci : move most slim jobs to self-hosted runners (#23619)
ggerganov May 25, 2026
6d57c26
perplexity : fix even more integer overflows (#23623)
fairydreaming May 25, 2026
e2ef8fe
server: fix checkpoints creation (#22929)
jacekpoplawski May 25, 2026
9627d0f
vendor : update cpp-httplib to 0.45.1 (#23639)
cabelo May 25, 2026
b964876
ui: media attachments before text (#23467)
sfallah May 25, 2026
826539c
ggml : Parallelize quant LUT init (#23595)
jeffbolznv May 25, 2026
d55fb97
ci : install host compiler on android-ndk build (#23630)
aldehir May 25, 2026
314e729
llama : document that only one on-device state can be saved per seque…
TimNN May 25, 2026
062d311
ci : fix pre-tokenizer-hashes check (#23651)
CISC May 25, 2026
5fdf07e
ci : update spacemit toolchain url and enhance curl command (#23642)
alex-spacemit May 25, 2026
6c4cbdc
server: MTP layer kv-cache should respect draft type ctk (#23646)
am17an May 25, 2026
66efd13
ggml: `gguf_init_from_callback` and `gguf_init_from_buffer` (#22341)
giladgd May 25, 2026
ae251b5
TP: fix ggml context size calculation (#22616)
JohannesGaessler May 25, 2026
fa97041
ggml-alloc: fix out-of-bounds read in ggml_dyn_tallocr_remove_block (…
Dev-X25874 May 21, 2026
b251f74
ggml.h: correct ggml_silu_back arg docstring (a=dy, b=x) (ggml/1500)
OriPekelman May 21, 2026
ce5890b
ggml : bump version to 0.12.1 (ggml/1508)
ggerganov May 25, 2026
22307b3
sync : ggml
ggerganov May 25, 2026
45158f4
ggml : bump version to 0.13.0 (ggml/1510)
ggerganov May 25, 2026
d161ea7
sync : ggml
ggerganov May 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/linux-setup-spacemit/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ runs:
id: setup
uses: ./.github/actions/unarchive-tar
with:
url: https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ inputs.version }}.tar.xz
url: https://github.com/spacemit-com/toolchain/releases/download/v${{ inputs.version }}/spacemit-toolchain-linux-glibc-x86_64-v${{ inputs.version }}.tar.xz
path: ${{ inputs.path }}
strip: 1
2 changes: 1 addition & 1 deletion .github/actions/unarchive-tar/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ runs:
run: |
mkdir -p ${{ inputs.path }}
cd ${{ inputs.path }}
curl --no-progress-meter ${{ inputs.url }} | tar -${{ inputs.type }}x --strip-components=${{ inputs.strip }}
curl --no-progress-meter -L ${{ inputs.url }} | tar -${{ inputs.type }}x --strip-components=${{ inputs.strip }}
5 changes: 5 additions & 0 deletions .github/workflows/build-android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ jobs:
fetch-depth: 0
lfs: false

- name: Dependencies
run: |
apt-get update
apt-get install -y build-essential
- name: Build
id: ndk_build
run: |
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/build-cmake-pkg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,12 @@ on:

jobs:
linux:
runs-on: ubuntu-slim
runs-on: [self-hosted, Linux, CPU]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Install dependencies
run: |
sudo apt update
sudo apt install -y build-essential tcl cmake
- name: Build
run: |
PREFIX="$(pwd)"/inst
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-cross.yml
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ jobs:

env:
# Make sure this is in sync with build-cache.yml
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.2.4"

steps:
- uses: actions/checkout@v6
Expand Down
118 changes: 67 additions & 51 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,7 @@ env:
LLAMA_LOG_TIMESTAMPS: 1

jobs:
determine-tag:
name: Determine tag name
runs-on: ubuntu-slim
outputs:
tag_name: ${{ steps.tag.outputs.name }}
steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Determine tag name
id: tag
uses: ./.github/actions/get-tag-name
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

ggml-ci-nvidia-cuda:
needs: determine-tag
runs-on: [self-hosted, Linux, NVIDIA]

steps:
Expand All @@ -82,14 +65,11 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
nvidia-smi
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-nvidia-vulkan-cm:
needs: determine-tag
runs-on: [self-hosted, Linux, NVIDIA]

steps:
Expand All @@ -99,14 +79,11 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-nvidia-vulkan-cm2:
needs: determine-tag
runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]

steps:
Expand All @@ -116,14 +93,12 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-nvidia-webgpu:
runs-on: [self-hosted, Linux, NVIDIA]
runs-on: [self-hosted, Linux, NVIDIA, X64]

steps:
- name: Clone
Expand All @@ -149,7 +124,7 @@ jobs:
GG_BUILD_WEBGPU=1 \
GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

# TODO: provision AMX-compatible machine
#ggml-ci-cpu-amx:
Expand All @@ -163,7 +138,7 @@ jobs:
# - name: Test
# id: ggml-ci
# run: |
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

# TODO: provision AMD GPU machine
# ggml-ci-amd-vulkan:
Expand All @@ -178,7 +153,7 @@ jobs:
# id: ggml-ci
# run: |
# vulkaninfo --summary
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

# TODO: provision AMD GPU machine
# ggml-ci-amd-rocm:
Expand All @@ -193,10 +168,9 @@ jobs:
# id: ggml-ci
# run: |
# amd-smi static
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-mac-metal:
needs: determine-tag
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand All @@ -206,13 +180,10 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-mac-webgpu:
needs: determine-tag
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand All @@ -235,14 +206,11 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-mac-vulkan:
needs: determine-tag
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand All @@ -252,14 +220,11 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-linux-intel-vulkan:
needs: determine-tag
runs-on: [self-hosted, Linux, Intel]

steps:
Expand All @@ -271,14 +236,11 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-win-intel-vulkan:
needs: determine-tag
runs-on: [self-hosted, Windows, X64, Intel]

steps:
Expand All @@ -293,15 +255,13 @@ jobs:
MSYSTEM: UCRT64
CHERE_INVOKING: 1
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
# a valid python environment for testing
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp

ggml-ci-intel-openvino-gpu-low-perf:
needs: determine-tag
runs-on: [self-hosted, Linux, Intel, OpenVINO]

concurrency:
Expand Down Expand Up @@ -333,8 +293,64 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
source ./openvino_toolkit/setupvars.sh
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-arm64-cpu-low-perf:
runs-on: [self-hosted, Linux, ARM64, CPU]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

- name: Test
id: ggml-ci
run: |
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-arm64-cpu-high-perf:
runs-on: [self-hosted, Linux, ARM64, CPU]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

- name: Test
id: ggml-ci
run: |
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

# TODO: not sure how to detect ARM flags on DGX Spark. currently get this error during cmake:
# CMake Warning at ggml/src/ggml-cpu/CMakeLists.txt:147 (message):
# ARM -march/-mcpu not found, -mcpu=native will be used
#
# if we resolve this, we should be able to offload these jobs to the self-hosted runners
#
# ggml-ci-arm64-cpu-high-perf-sve:
# runs-on: [self-hosted, Linux, ARM64, CPU]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
#
# - name: Test
# id: ggml-ci
# run: |
# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
#
# ggml-ci-arm64-cpu-kleidiai:
# runs-on: [self-hosted, Linux, ARM64, CPU]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
#
# - name: Test
# id: ggml-ci
# run: |
# GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
Loading
Loading