Skip to content
6 changes: 6 additions & 0 deletions nemo_retriever/src/nemo_retriever/pipeline/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from nemo_retriever.params.models import BatchTuningParams
from nemo_retriever.utils.input_files import resolve_input_patterns
from nemo_retriever.utils.remote_auth import resolve_remote_api_key
from nemo_retriever.vdb.runtime import describe_vdb_runtime

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1511,6 +1512,8 @@ def run(
"meta_join_key": meta_join_key,
}

vdb_runtime_summary = describe_vdb_runtime(resolved_vdb_op, resolved_vdb_kwargs)

remote_api_key = resolve_remote_api_key(api_key)
extract_remote_api_key = remote_api_key
embed_remote_api_key = remote_api_key
Expand Down Expand Up @@ -1765,6 +1768,7 @@ def run(
"evaluation_count": None,
"recall_details": bool(recall_details),
"vdb_op": str(resolved_vdb_op),
"vdb": vdb_runtime_summary,
"qa_sweep_exit_code": qa_code,
},
)
Expand Down Expand Up @@ -1847,6 +1851,7 @@ def run(
"evaluation_metrics": {},
"recall_details": bool(recall_details),
"vdb_op": str(resolved_vdb_op),
"vdb": vdb_runtime_summary,
},
)
if run_mode == "batch":
Expand Down Expand Up @@ -1880,6 +1885,7 @@ def run(
"evaluation_count": evaluation_query_count,
"recall_details": bool(recall_details),
"vdb_op": str(resolved_vdb_op),
"vdb": vdb_runtime_summary,
},
)

Expand Down
3 changes: 3 additions & 0 deletions nemo_retriever/src/nemo_retriever/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ def _execute_queries_graph(
text_col = str(embed_params.text_column)
df = pd.DataFrame({text_col: query_texts})

# Hybrid retrieval relies on these ordered query strings staying aligned
# with the embedded rows produced from ``df``. If this query graph grows
# distributed/shuffled stages, carry row-local query text or IDs instead.
graph = self._get_graph(embed_extra=embed_extra)
if not callable(getattr(graph, "resolve_for_local_execution", None)):
raise TypeError("graph must provide resolve_for_local_execution() (e.g. pipeline_graph.Graph)")
Expand Down
15 changes: 8 additions & 7 deletions nemo_retriever/src/nemo_retriever/vdb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ When `vdb_op="lancedb"` (or `vdb=LanceDB(...)` is passed explicitly), `_construc
`LanceDB.run` (in `lancedb.py`) orchestrates:

1. **`create_index`** — connects with `lancedb.connect(self.uri)`, transforms ingestion batches into Arrow rows (`vector`, `text`, `metadata`, `source`), and **`db.create_table(...)`** with schema and `on_bad_vectors` policy.
2. **`write_to_index`** — builds the **vector index** (e.g. IVF/HNSW) and optionally an **FTS** index when `hybrid=True`.
2. **`write_to_index`** — builds the **vector index** (e.g. IVF/HNSW) and optionally an **FTS/BM25** index over the ingested `text` column when `hybrid=True`.

Common constructor arguments include:

Expand All @@ -82,7 +82,7 @@ Common constructor arguments include:
| `overwrite` | Table create mode vs append |
| `vector_dim` | Expected embedding dimension (default 2048) |
| `index_type` / `metric` / `num_partitions` / `num_sub_vectors` | Vector index tuning |
| `hybrid` | Also build FTS on `text` |
| `hybrid` | Also build the LanceDB FTS/BM25 index on ingested `text` |
| `on_bad_vectors`| `drop`, `fill`, `null`, or `error` |

---
Expand All @@ -93,16 +93,17 @@ Common constructor arguments include:

`RetrieveVdbOperator` wraps the same concrete **`VDB`** instance but calls **`retrieval(vectors, **kwargs)`** instead of `run`. It merges per-call kwargs with the operator’s stored `vdb_kwargs` and returns **`normalize_retrieval_results(...)`** output (see `operators.py`, `records.py`).

Important: retrieval here expects **`vectors`** — a list of query embedding vectors — **not** raw query strings. String queries are embedded elsewhere (e.g. in `Retriever`).
Important: retrieval here expects **`vectors`** — a list of query embedding vectors — as the primary input. String queries are embedded elsewhere (e.g. in `Retriever`). Hybrid backends that need raw text receive aligned `query_texts` as execution-only call context.

### LanceDB inside `RetrieveVdbOperator`

For `vdb_op="lancedb"`, **`LanceDB.retrieval`**:

- Opens the table with `lancedb.connect(table_path).open_table(table_name)`.
- For each query vector: **`table.search([vector], vector_column_name=..., **search_kwargs)`**, optional **`.where(where_clause)`** (Lance / DataFusion SQL; `metadata` / `source` are stored as JSON strings), then **`.limit(top_k).refine_factor(...).nprobes(...)`**.
- For dense retrieval, each query vector uses **`table.search([vector], vector_column_name=..., **search_kwargs)`**, optional **`.where(where_clause)`** (Lance / DataFusion SQL; `metadata` / `source` are stored as JSON strings), then **`.limit(top_k).refine_factor(...).nprobes(...)`**.
- For hybrid retrieval, callers pass `hybrid=True` plus `query_texts` aligned with the vectors. LanceDB uses **`table.search(query_type="hybrid", vector_column_name=..., fts_columns="text").vector(vector).text(query_text)`** before applying the same `where`, limit, refine, probe, and select handling.

Notable kwargs: `top_k`, `refine_factor`, `n_probe` / `nprobes`, `where` or `_filter`, `table_path`, `table_name`, `search_kwargs`. **Hybrid search with precomputed vectors is not implemented** in this path (`NotImplementedError` if `hybrid=True`).
Notable kwargs: `top_k`, `refine_factor`, `n_probe` / `nprobes`, `where` or `_filter`, `table_path`, `table_name`, `search_kwargs`, `hybrid`, and `query_texts`. `query_texts` is stripped from constructor kwargs and forwarded only for retrieval calls whose effective mode is hybrid.

Example of **direct** operator use (you supply vectors):

Expand Down Expand Up @@ -224,9 +225,9 @@ hits = filter_hits_by_content_metadata(

Each hit's `metadata` field is a JSON string. Use **`parse_hit_content_metadata(hit)`** to get a `dict` you can read directly (this is what `filter_hits_by_content_metadata` uses internally). Both helpers are exported from `nemo_retriever.vdb`.

### Not implemented in this path
### Hybrid retrieval

Hybrid search (`hybrid=True`) is not implemented for the precomputed-vector retrieval path — `LanceDB.retrieval` raises `NotImplementedError`. Filters above apply only to dense vector search.
Hybrid search (`hybrid=True`) is implemented for LanceDB's precomputed-vector retrieval path. It requires `query_texts` aligned one-to-one with the query vectors so the backend can combine the dense vector query with full-text search. Filters above apply to both dense and hybrid search.

---

Expand Down
2 changes: 2 additions & 0 deletions nemo_retriever/src/nemo_retriever/vdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from nemo_retriever.vdb.factory import get_vdb_op_cls
from nemo_retriever.vdb.operators import IngestVdbOperator, PutVdbOperator, RetrieveVdbOperator
from nemo_retriever.vdb.records import RetrievalHit, normalize_retrieval_results, to_client_vdb_records
from nemo_retriever.vdb.runtime import describe_vdb_runtime
from nemo_retriever.vdb.sidecar_metadata import (
apply_sidecar_metadata_to_client_batches,
build_sidecar_lookup,
Expand All @@ -27,6 +28,7 @@
"RetrieveVdbOperator",
"normalize_retrieval_results",
"to_client_vdb_records",
"describe_vdb_runtime",
"apply_sidecar_metadata_to_client_batches",
"build_sidecar_lookup",
"filter_hits_by_content_metadata",
Expand Down
22 changes: 15 additions & 7 deletions nemo_retriever/src/nemo_retriever/vdb/adt_vdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
ingestion of Nemo Retriever Library (NRL) record batches.
- `retrieval` — nearest-neighbor search over **precomputed query vectors**.
Query strings are embedded upstream (see `nemo_retriever.Retriever`);
the VDB only sees vectors.
the VDB search boundary receives vectors as the primary input. Backends
that combine dense and lexical evidence may also receive aligned
`query_texts` as execution-only retrieval context.

Methods accept `**kwargs` so backend-specific options (e.g. LanceDB's
`where` predicate for metadata filtering, refinement factors,
Expand Down Expand Up @@ -102,9 +104,11 @@ def retrieval(self, queries: list, **kwargs):

Despite the parameter name `queries` (kept for backward
compatibility), this method receives a list of embedding vectors,
one per query — *not* raw text. Query text is embedded upstream,
typically inside `nemo_retriever.Retriever`, before this method
is called.
one per query. Query text is embedded upstream, typically inside
`nemo_retriever.Retriever`, before this method is called. Backends
that need the raw query string for retrieval-time lexical matching
may additionally consume `query_texts` from `kwargs`; those strings
must be aligned one-to-one with the input vectors.

Implementations search the index, apply any post-filtering, and
return a list of hit lists aligned with the input (one inner list
Expand All @@ -120,6 +124,10 @@ def retrieval(self, queries: list, **kwargs):
serialized JSON, e.g.
`metadata LIKE '%"meta_a":"alpha"%'`.
The `_filter` alias is accepted in addition to `where`.
- query_texts: raw query strings aligned with the input vectors.
Dense retrieval backends should not require this. LanceDB hybrid
search requires it because the backend combines the precomputed
dense vector with a full-text query at search time.
- refine_factor / nprobes / search_kwargs: ANN tuning passed
through to the backend.

Expand All @@ -128,9 +136,9 @@ def retrieval(self, queries: list, **kwargs):
for the full filter cookbook (sidecar merge, server-side vs
client-side filtering, escaping).

Hybrid search with precomputed vectors is not implemented by the
reference `LanceDB` backend; passing `hybrid=True` raises
`NotImplementedError` on that path.
`query_texts` is execution-only context. Operators should avoid
persisting it in backend constructor kwargs and pass it only for
retrieval calls whose effective mode needs raw text.
"""
pass

Expand Down
49 changes: 44 additions & 5 deletions nemo_retriever/src/nemo_retriever/vdb/lancedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import time

from collections.abc import Iterable, Sequence
from datetime import timedelta
from typing import Any, Final, FrozenSet

Expand Down Expand Up @@ -626,7 +627,7 @@ def put(
counts["put"] = len(rows)
return counts

def retrieval(self, vectors, **kwargs):
def retrieval(self, vectors: Iterable[Sequence[float]], **kwargs: Any) -> list[list[dict[str, Any]]]:
"""Search LanceDB with precomputed query vectors.

Keyword arguments
Expand All @@ -643,10 +644,12 @@ def retrieval(self, vectors, **kwargs):
``table.search`` (e.g. ``query_type``, ``fts_columns``). Do not
pass ``vector_column_name`` here; use the top-level
``vector_column_name`` retrieval argument instead.
query_texts:
Raw query strings aligned with ``vectors``. Required for
``hybrid=True`` and ignored for dense-only retrieval.
"""
hybrid = kwargs.pop("hybrid", self.hybrid)
if hybrid:
raise NotImplementedError("LanceDB hybrid retrieval with precomputed vectors is not implemented yet.")
query_texts = kwargs.pop("query_texts", None)
table_path = kwargs.pop("table_path", self.uri)
table_name = kwargs.pop("table_name", self.table_name)

Expand All @@ -664,6 +667,23 @@ def retrieval(self, vectors, **kwargs):
else:
search_kwargs = dict(search_kwargs_raw)

if hybrid:
if query_texts is None:
raise ValueError(
"LanceDB hybrid retrieval requires query_texts. Pass query_texts=your_queries "
"alongside vectors when calling retrieval() with hybrid=True."
)
query_type = search_kwargs.get("query_type")
if query_type is not None:
query_type_value = getattr(query_type, "value", query_type)
if str(query_type_value).lower() != "hybrid":
raise ValueError(
"LanceDB hybrid retrieval requires search_kwargs['query_type']='hybrid'; "
f"got {query_type!r}."
)
search_kwargs["query_type"] = "hybrid"
search_kwargs.setdefault("fts_columns", "text")

where_clause = kwargs.pop("where", None)
_filter_fallback = kwargs.pop("_filter", None)
if where_clause is None:
Expand All @@ -673,9 +693,28 @@ def retrieval(self, vectors, **kwargs):

table = lancedb.connect(uri=table_path).open_table(table_name)

if hybrid:
vectors_for_search = list(vectors)
query_texts_list = [query_texts] if isinstance(query_texts, str) else list(query_texts)
if len(query_texts_list) != len(vectors_for_search):
raise ValueError(
"LanceDB hybrid retrieval requires query_texts length to match vectors length; "
f"got query_texts={len(query_texts_list)} vectors={len(vectors_for_search)}."
)
else:
vectors_for_search = vectors
query_texts_list = []

search_results = []
for vector in vectors:
query = table.search([vector], vector_column_name=vector_column_name, **search_kwargs)
for idx, vector in enumerate(vectors_for_search):
if hybrid:
query = (
table.search(vector_column_name=vector_column_name, **search_kwargs)
.vector(vector)
.text(str(query_texts_list[idx]))
)
else:
query = table.search([vector], vector_column_name=vector_column_name, **search_kwargs)
if where_clause is not None:
query = query.where(where_clause)
query = query.limit(top_k).refine_factor(refine_factor).nprobes(n_probe)
Expand Down
7 changes: 7 additions & 0 deletions nemo_retriever/src/nemo_retriever/vdb/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def __init__(
) -> None:
merged = dict(vdb_kwargs or {})
clean_kwargs, _sidecar = split_sidecar_from_vdb_kwargs(merged)
clean_kwargs.pop("query_texts", None)
super().__init__(vdb=vdb, vdb_op=vdb_op, vdb_kwargs=clean_kwargs, explode_for_rerank=explode_for_rerank)
self._vdb_kwargs = clean_kwargs
self._retrieval_vdb_kwargs = clean_kwargs
Expand All @@ -216,6 +217,12 @@ def process(self, data: Any, **kwargs: Any) -> list[list[dict[str, Any]]]:
from nemo_retriever.retriever_graph_utils import filter_retrieval_kwargs

retrieval_kwargs = {**self._retrieval_vdb_kwargs, **filter_retrieval_kwargs(kwargs)}
if "hybrid" in retrieval_kwargs:
effective_hybrid = bool(retrieval_kwargs["hybrid"])
else:
effective_hybrid = bool(getattr(self._vdb, "hybrid", False))
if effective_hybrid and "query_texts" in kwargs:
retrieval_kwargs["query_texts"] = kwargs["query_texts"]
return normalize_retrieval_results(self._vdb.retrieval(data, **retrieval_kwargs))

def postprocess(self, data: Any, **kwargs: Any) -> Any:
Expand Down
Loading
Loading