Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
136 commits
Select commit Hold shift + click to select a range
e38efb9
feat: add diskann index
richyreachy Apr 22, 2026
43c9db5
fix: add libaio dependency
richyreachy Apr 23, 2026
043456d
Merge branch 'main' into feat/diskann_index
richyreachy Apr 23, 2026
dae465c
fix: fix initialization
richyreachy Apr 23, 2026
1202e42
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 23, 2026
d9503d3
Merge branch 'main' into feat/diskann_index
richyreachy Apr 23, 2026
c24cd82
refactor: fix ut
richyreachy Apr 24, 2026
87a1b8c
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 24, 2026
4e668da
fix: fix seed value
richyreachy Apr 25, 2026
fe60376
fix: fix seed value
richyreachy Apr 26, 2026
dc3d0a8
fix: update ut
richyreachy Apr 27, 2026
071d275
Merge branch 'main' into feat/diskann_index
richyreachy Apr 27, 2026
78eea04
fix: fix ut
richyreachy Apr 27, 2026
41f893d
Merge branch 'main' into feat/diskann_index
richyreachy Apr 28, 2026
4cc1ecb
fix: fix according to pr comments
richyreachy Apr 28, 2026
35192e7
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 28, 2026
53f9ef9
fix: fix on comments
richyreachy Apr 29, 2026
f3ee4ca
fix: fix yaml
richyreachy Apr 29, 2026
fd71a06
feat: support dynamic load
richyreachy Apr 29, 2026
17570d7
feat: add dynamic load
richyreachy Apr 29, 2026
9ac2c3f
Merge branch 'main' into feat/diskann_index
richyreachy Apr 29, 2026
b9a19d1
fix: plugin
richyreachy Apr 29, 2026
9712bd5
fix: plugin
richyreachy Apr 29, 2026
e7bceb4
fix: plugin
richyreachy Apr 29, 2026
9c91f11
fix: fix yaml
richyreachy Apr 29, 2026
1c486f3
fix: plugin
richyreachy Apr 29, 2026
56fcc95
fix: plugin
richyreachy Apr 29, 2026
32954a1
fix: fix plugin
richyreachy Apr 29, 2026
c0b2a8f
Revert "fix: fix plugin"
richyreachy Apr 29, 2026
916c870
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy Apr 29, 2026
f78ff5d
fix: fix ut
richyreachy Apr 29, 2026
7fec86c
Merge branch 'feat/diskann_index' into feat/diskann_dynamic_load
richyreachy Apr 29, 2026
ac1bad7
fix: fix windows build
richyreachy Apr 29, 2026
bf6365d
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy Apr 29, 2026
a2fa6f2
fix: fix build
richyreachy Apr 30, 2026
9d0a63b
feat: merge with main
richyreachy May 7, 2026
7e07c0b
feat: merge with main
richyreachy May 7, 2026
1e23806
fix: fix buffer storage
richyreachy May 7, 2026
9b0e226
Merge branch 'feat/diskann_index' into feat/diskann_dynamic_load
richyreachy May 7, 2026
5d5634a
refactor: use silent invokation
richyreachy May 7, 2026
2b561f9
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy May 7, 2026
12ae920
Merge branch 'main' into feat/diskann_index
richyreachy May 7, 2026
08736db
fix: fix build
richyreachy May 8, 2026
58bbc07
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy May 8, 2026
0caea47
fix: fix python
richyreachy May 11, 2026
e8706a8
Merge branch 'feat/diskann_dynamic_load' of github.com:richyreachy/zv…
richyreachy May 11, 2026
259d69b
refactor: change macro
richyreachy May 12, 2026
5951ecb
Merge branch 'main' into feat/diskann_index
richyreachy May 12, 2026
93a6ec9
fix: fix symbol
richyreachy May 12, 2026
0beaa3d
fix: fix symbol
richyreachy May 12, 2026
2a7bfd1
fix: fix symbol
richyreachy May 12, 2026
e723a03
feat: add ut
richyreachy May 12, 2026
c18cdc1
Merge branch 'feat/diskann_dynamic_load' into feat/diskann_index
richyreachy May 18, 2026
ca60517
Merge branch 'main' into feat/diskann_index
richyreachy May 18, 2026
7221594
fix: fix clang tidy
richyreachy May 18, 2026
ef06317
fix: add lib
richyreachy May 18, 2026
55268ef
Merge branch 'main' into feat/diskann_index
richyreachy May 18, 2026
76ca655
fix: deprecation warning
richyreachy May 19, 2026
689d987
Merge branch 'main' into feat/diskann_index
richyreachy May 19, 2026
c991124
Merge branch 'main' into feat/diskann_index
richyreachy May 19, 2026
1b19ee2
fix: fix win ut
richyreachy May 21, 2026
5190f9a
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy May 21, 2026
71d6701
Merge branch 'main' into feat/diskann_index
richyreachy May 21, 2026
9423d60
Merge branch 'main' into feat/diskann_index
richyreachy May 21, 2026
1875fed
Merge branch 'main' into feat/diskann_index
richyreachy May 27, 2026
48a56ed
fix: fix ut for merge
richyreachy May 27, 2026
764465e
fix: fix ut
richyreachy May 27, 2026
7dbc8b9
Merge branch 'main' into feat/diskann_index
richyreachy May 27, 2026
b477a05
fix: add provider
richyreachy May 27, 2026
9a59894
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy May 27, 2026
f908213
fix: fix clang tidy
richyreachy May 27, 2026
e155770
Merge branch 'main' into feat/diskann_index
richyreachy May 28, 2026
20ec830
fix: fix ut
richyreachy May 28, 2026
1af68bc
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy May 28, 2026
8a513d1
fix: fix ut
richyreachy May 28, 2026
aaa3b8a
fix: update code
richyreachy May 28, 2026
c9238f9
fix: update code
richyreachy May 28, 2026
15ea014
fix: remove unused comments
richyreachy May 28, 2026
f42f4ec
fix: update code
richyreachy May 28, 2026
e64912a
fix: fix code according to comments
richyreachy May 28, 2026
f8f950d
fix: update code
richyreachy May 29, 2026
8044f8f
fix: update code
richyreachy May 29, 2026
6513565
fix: update code
richyreachy May 29, 2026
4f1ffb4
fix: fix cmake
richyreachy May 29, 2026
91fb9e3
fix: fix update
richyreachy May 29, 2026
60ba001
fix: update code
richyreachy May 29, 2026
34cadcc
Merge branch 'main' into feat/diskann_index
richyreachy May 29, 2026
29e5f48
fix: add file
richyreachy May 29, 2026
7dd5898
Merge branch 'main' into feat/diskann_index
richyreachy May 29, 2026
812d9d3
Merge branch 'main' into feat/diskann_index
richyreachy May 29, 2026
9626d92
fix: update code
richyreachy May 29, 2026
9ec91de
Merge branch 'main' into feat/diskann_index
richyreachy Jun 1, 2026
e77fb03
fix: remove load
richyreachy Jun 1, 2026
fb36a81
fix: fix storage
richyreachy Jun 1, 2026
4434a5f
Merge branch 'main' into feat/diskann_index
richyreachy Jun 1, 2026
4461fe3
fix: update code
richyreachy Jun 1, 2026
38270a5
fix: fix format
richyreachy Jun 1, 2026
c92e33f
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
d5bff16
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
744f419
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
8f7e910
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
f272694
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
e2084f6
fix: judge nullptr
richyreachy Jun 2, 2026
8bf3c48
fix: minor problem
richyreachy Jun 2, 2026
022b9ff
fix: minor problem
richyreachy Jun 2, 2026
32280dd
fix: fix minor issue
richyreachy Jun 2, 2026
80836c1
fix: update code
richyreachy Jun 2, 2026
f070e87
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
a3ede62
fix: use call once
richyreachy Jun 2, 2026
4c378c8
fix: change naming
richyreachy Jun 2, 2026
d9120f0
Merge branch 'main' into feat/diskann_index
richyreachy Jun 2, 2026
0da4c18
fix: fix params
richyreachy Jun 2, 2026
6b78c9d
fix: fix param
richyreachy Jun 2, 2026
82b8459
fix: remove empty file
richyreachy Jun 2, 2026
8d49d54
fix: remove virtual
richyreachy Jun 2, 2026
610a5d3
fix: fix return value
richyreachy Jun 3, 2026
03cd778
fix: update
richyreachy Jun 3, 2026
8887ede
Merge branch 'main' into feat/diskann_index
richyreachy Jun 3, 2026
a664187
fix: update code
richyreachy Jun 3, 2026
ddc84d6
fix: update code
richyreachy Jun 3, 2026
75b565b
fix: update code
richyreachy Jun 3, 2026
14785c9
fix: update code
richyreachy Jun 3, 2026
91f1ef7
fix: update code
richyreachy Jun 3, 2026
22b42c4
fix: update code
richyreachy Jun 3, 2026
963bbae
fix: add fix
richyreachy Jun 3, 2026
aabc5eb
fix: update code
richyreachy Jun 3, 2026
95dc821
fix: update code
richyreachy Jun 3, 2026
ad00363
fix: update code
richyreachy Jun 3, 2026
4d32514
fix: update code
richyreachy Jun 3, 2026
40d8d4b
fix: update code
richyreachy Jun 3, 2026
fa32f79
fix: update code
richyreachy Jun 3, 2026
6895178
fix: update code
richyreachy Jun 3, 2026
c478c36
fix: update code
richyreachy Jun 3, 2026
cb4cf62
fix: update code
richyreachy Jun 3, 2026
028c61e
fix: update
richyreachy Jun 3, 2026
771cb1b
Merge branch 'main' into feat/diskann_index
richyreachy Jun 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/03-macos-linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ jobs:
sudo apt-get install -y clang libomp-dev
shell: bash

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
Comment thread
feihongxu0824 marked this conversation as resolved.
shell: bash

- name: Print CPU info
if: runner.os == 'Linux'
run: lscpu
Expand Down Expand Up @@ -97,7 +105,7 @@ jobs:
pytest-xdist \
scikit-build-core \
setuptools_scm
shell: bash
Comment thread
feihongxu0824 marked this conversation as resolved.
shell: bash

- name: Build from source
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/clang_tidy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
if: steps.changed_files.outputs.any_changed == 'true'
run: |
sudo apt-get update
sudo apt-get install -y clang-tidy=1:18.0-59~exp2 cmake ninja-build libomp-dev
sudo apt-get install -y clang-tidy=1:18.0-59~exp2 cmake ninja-build libomp-dev libaio-dev

- name: Setup ccache
if: steps.changed_files.outputs.any_changed == 'true'
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
[submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
url = https://github.com/VectorDB-NTU/RaBitQ-Library.git
[submodule "thirdparty/aio/libaio-0.3"]
path = thirdparty/aio/libaio-0.3
url = https://github.com/yugabyte/libaio.git
[submodule "thirdparty/cppjieba/cppjieba-5.6.7"]
path = thirdparty/cppjieba/cppjieba-5.6.7
url = https://github.com/yanyiwu/cppjieba.git
Expand Down
22 changes: 22 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,17 @@ else()
endif()
message(STATUS "RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}")

# DiskAnn support (Linux x86_64 only, requires libaio)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386" AND NOT ANDROID AND NOT IOS)
set(DISKANN_SUPPORTED ON)
add_definitions(-DDISKANN_SUPPORTED=1)
else()
set(DISKANN_SUPPORTED OFF)
add_definitions(-DDISKANN_SUPPORTED=0)
message(STATUS "DiskAnn support disabled - only supported on Linux x86_64")
endif()
message(STATUS "DISKANN_SUPPORTED: ${DISKANN_SUPPORTED}")

option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" OFF)
if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "")
set(USE_OSS_MIRROR "$ENV{USE_OSS_MIRROR}" CACHE BOOL "Use OSS mirror for faster third-party downloads" FORCE)
Expand Down Expand Up @@ -155,6 +166,17 @@ if(BUILD_PYTHON_BINDINGS)
message(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})

# DiskAnn ships as a runtime-loaded shared module
# (libzvec_diskann_plugin.so) that is brought online implicitly the
# first time a DiskAnn index is created — users never call any load
# function. The Python extension resolves the module next to _zvec.so
# (see the $ORIGIN rpath in src/binding/python/CMakeLists.txt); the
# module must therefore be installed alongside _zvec.so in the wheel.
# The target exists only on platforms where DiskAnn is buildable
# (currently Linux x86_64 with libaio).
if(TARGET core_knn_diskann)
install(TARGETS core_knn_diskann LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})
endif()
# Bundle cppjieba's dictionary files so the `jieba` FTS tokenizer works
# out of the box. python/zvec/__init__.py resolves this directory via
# importlib.resources and registers it with set_default_jieba_dict_dir().
Expand Down
60 changes: 60 additions & 0 deletions python/tests/detail/fixture_helper.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,60 @@
import pytest
import logging
import platform

DISKANN_SUPPORTED = platform.system() == "Linux" and platform.machine() in (
"x86_64",
"AMD64",
"i686",
"i386",
)

from typing import Any, Generator
from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
import zvec


# Cache the DiskAnn plugin preload status so we pay the load cost once per
# test session. The plugin normally auto-loads on first DiskAnn use, but we
# preload it explicitly here so a missing libaio / misplaced plugin .so
# surfaces as a clear pytest skip instead of a confusing
# "Create vector column indexer failed" deep inside the collection code path.
_DISKANN_PRELOAD_REASON: str | None = None
_DISKANN_PRELOAD_DONE: bool = False


def _ensure_diskann_runtime_or_reason() -> str | None:
"""Preload the DiskAnn plugin and return None on success or a human-readable
skip reason on failure. Idempotent across calls."""
global _DISKANN_PRELOAD_DONE, _DISKANN_PRELOAD_REASON
if _DISKANN_PRELOAD_DONE:
return _DISKANN_PRELOAD_REASON
_DISKANN_PRELOAD_DONE = True

if not DISKANN_SUPPORTED:
_DISKANN_PRELOAD_REASON = "DiskAnn only supported on Linux x86_64"
return _DISKANN_PRELOAD_REASON

if not zvec.is_libaio_available():
_DISKANN_PRELOAD_REASON = (
"libaio is not available on this host; DiskAnn cannot run. "
"Install libaio1 (or libaio1t64 on Ubuntu 24.04+) and retry."
)
return _DISKANN_PRELOAD_REASON

status = zvec.load_diskann_plugin()
if status != zvec.DISKANN_PLUGIN_OK:
_DISKANN_PRELOAD_REASON = (
f"Failed to load DiskAnn plugin (status={status}); "
"check that libzvec_diskann_plugin.so is installed alongside "
"_zvec.so in the Python site-packages directory."
)
return _DISKANN_PRELOAD_REASON

_DISKANN_PRELOAD_REASON = None
return None


from zvec import (
CollectionOption,
InvertIndexParam,
Expand Down Expand Up @@ -97,6 +148,15 @@ def full_schema_new(request) -> CollectionSchema:
else:
nullable, has_index, vector_index = True, False, HnswIndexParam()

# Skip DiskAnn tests on unsupported platforms or when the runtime cannot
# be brought up (missing libaio, plugin .so not installed, etc.).
from zvec.model.param import DiskAnnIndexParam

if isinstance(vector_index, DiskAnnIndexParam):
skip_reason = _ensure_diskann_runtime_or_reason()
if skip_reason is not None:
pytest.skip(skip_reason)

scalar_index_param = None
vector_index_param = None
if has_index:
Expand Down
96 changes: 80 additions & 16 deletions python/tests/detail/test_collection_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
HnswIndexParam,
FlatIndexParam,
IVFIndexParam,
DiskAnnIndexParam,
HnswQueryParam,
IVFQueryParam,
DiskAnnQueryParam,
)

from zvec.model.schema import FieldSchema, VectorSchema
Expand Down Expand Up @@ -179,10 +181,24 @@ def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type,
for field_name, query_vectors in query_vectors_map.items():
ground_truth_map[field_name] = {}

# Support per-field metric type: metric_type can be a dict mapping
# field_name -> MetricType, or a single MetricType applied to all fields.
if isinstance(metric_type, dict):
field_metric = metric_type.get(field_name, MetricType.IP)
else:
field_metric = metric_type

for i, query_vector in enumerate(query_vectors):
# Get the ground truth for this query
relevant_doc_ids_scores = get_ground_truth_for_vector_query(
collection, query_vector, field_name, test_docs, i, metric_type, k, True
collection,
query_vector,
field_name,
test_docs,
i,
field_metric,
k,
True,
)
ground_truth_map[field_name][i] = relevant_doc_ids_scores

Expand Down Expand Up @@ -292,6 +308,7 @@ class TestRecall:
[
(True, True, HnswIndexParam()),
(False, True, IVFIndexParam()),
(False, True, DiskAnnIndexParam()),
(False, True, FlatIndexParam()), # ——ok
(
True,
Expand Down Expand Up @@ -371,6 +388,19 @@ class TestRecall:
use_soar=False,
),
),
(
True,
True,
DiskAnnIndexParam(
metric_type=MetricType.IP,
max_degree=32,
),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -388,10 +418,16 @@ def test_recall_with_single_vector_valid_500(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -438,9 +474,13 @@ def test_recall_with_single_vector_valid_500(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -479,8 +519,8 @@ def test_recall_with_single_vector_valid_500(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down Expand Up @@ -552,7 +592,21 @@ def test_recall_with_single_vector_valid_500(
use_soar=True,
),
),
# (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.IP, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.COSINE, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -571,10 +625,16 @@ def test_recall_with_single_vector_valid_2000(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -621,9 +681,13 @@ def test_recall_with_single_vector_valid_2000(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -662,8 +726,8 @@ def test_recall_with_single_vector_valid_2000(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down
Loading
Loading