NVIDIA · jioffe502 · May 14, 2026 · May 14, 2026 · May 18, 2026 · May 26, 2026
@@ -75,6 +75,7 @@
 from nemo_retriever.params.models import BatchTuningParams
 from nemo_retriever.utils.input_files import resolve_input_patterns
 from nemo_retriever.utils.remote_auth import resolve_remote_api_key
+from nemo_retriever.vdb.runtime import describe_vdb_runtime
 
 logger = logging.getLogger(__name__)
 
@@ -1511,6 +1512,8 @@ def run(
                 "meta_join_key": meta_join_key,
             }
 
+        vdb_runtime_summary = describe_vdb_runtime(resolved_vdb_op, resolved_vdb_kwargs)
+
         remote_api_key = resolve_remote_api_key(api_key)
         extract_remote_api_key = remote_api_key
         embed_remote_api_key = remote_api_key
@@ -1765,6 +1768,7 @@ def run(
                     "evaluation_count": None,
                     "recall_details": bool(recall_details),
                     "vdb_op": str(resolved_vdb_op),
+                    "vdb": vdb_runtime_summary,
                     "qa_sweep_exit_code": qa_code,
                 },
             )
@@ -1847,6 +1851,7 @@ def run(
                     "evaluation_metrics": {},
                     "recall_details": bool(recall_details),
                     "vdb_op": str(resolved_vdb_op),
+                    "vdb": vdb_runtime_summary,
                 },
             )
             if run_mode == "batch":
@@ -1880,6 +1885,7 @@ def run(
                 "evaluation_count": evaluation_query_count,
                 "recall_details": bool(recall_details),
                 "vdb_op": str(resolved_vdb_op),
+                "vdb": vdb_runtime_summary,
             },
         )
 

@@ -179,6 +179,9 @@ def _execute_queries_graph(
         text_col = str(embed_params.text_column)
         df = pd.DataFrame({text_col: query_texts})
 
+        # Hybrid retrieval relies on these ordered query strings staying aligned
+        # with the embedded rows produced from ``df``. If this query graph grows
+        # distributed/shuffled stages, carry row-local query text or IDs instead.
         graph = self._get_graph(embed_extra=embed_extra)
         if not callable(getattr(graph, "resolve_for_local_execution", None)):
             raise TypeError("graph must provide resolve_for_local_execution() (e.g. pipeline_graph.Graph)")

@@ -71,7 +71,7 @@ When `vdb_op="lancedb"` (or `vdb=LanceDB(...)` is passed explicitly), `_construc
 `LanceDB.run` (in `lancedb.py`) orchestrates:
 
 1. **`create_index`** — connects with `lancedb.connect(self.uri)`, transforms ingestion batches into Arrow rows (`vector`, `text`, `metadata`, `source`), and **`db.create_table(...)`** with schema and `on_bad_vectors` policy.
-2. **`write_to_index`** — builds the **vector index** (e.g. IVF/HNSW) and optionally an **FTS** index when `hybrid=True`.
+2. **`write_to_index`** — builds the **vector index** (e.g. IVF/HNSW) and optionally an **FTS/BM25** index over the ingested `text` column when `hybrid=True`.
 
 Common constructor arguments include:
 
@@ -82,7 +82,7 @@ Common constructor arguments include:
 | `overwrite`     | Table create mode vs append |
 | `vector_dim`    | Expected embedding dimension (default 2048) |
 | `index_type` / `metric` / `num_partitions` / `num_sub_vectors` | Vector index tuning |
-| `hybrid`        | Also build FTS on `text` |
+| `hybrid`        | Also build the LanceDB FTS/BM25 index on ingested `text` |
 | `on_bad_vectors`| `drop`, `fill`, `null`, or `error` |
 
 ---
@@ -93,16 +93,17 @@ Common constructor arguments include:
 
 `RetrieveVdbOperator` wraps the same concrete **`VDB`** instance but calls **`retrieval(vectors, **kwargs)`** instead of `run`. It merges per-call kwargs with the operator’s stored `vdb_kwargs` and returns **`normalize_retrieval_results(...)`** output (see `operators.py`, `records.py`).
 
-Important: retrieval here expects **`vectors`** — a list of query embedding vectors — **not** raw query strings. String queries are embedded elsewhere (e.g. in `Retriever`).
+Important: retrieval here expects **`vectors`** — a list of query embedding vectors — as the primary input. String queries are embedded elsewhere (e.g. in `Retriever`). Hybrid backends that need raw text receive aligned `query_texts` as execution-only call context.
 
 ### LanceDB inside `RetrieveVdbOperator`
 
 For `vdb_op="lancedb"`, **`LanceDB.retrieval`**:
 
 - Opens the table with `lancedb.connect(table_path).open_table(table_name)`.
-- For each query vector: **`table.search([vector], vector_column_name=..., **search_kwargs)`**, optional **`.where(where_clause)`** (Lance / DataFusion SQL; `metadata` / `source` are stored as JSON strings), then **`.limit(top_k).refine_factor(...).nprobes(...)`**.
+- For dense retrieval, each query vector uses **`table.search([vector], vector_column_name=..., **search_kwargs)`**, optional **`.where(where_clause)`** (Lance / DataFusion SQL; `metadata` / `source` are stored as JSON strings), then **`.limit(top_k).refine_factor(...).nprobes(...)`**.
+- For hybrid retrieval, callers pass `hybrid=True` plus `query_texts` aligned with the vectors. LanceDB uses **`table.search(query_type="hybrid", vector_column_name=..., fts_columns="text").vector(vector).text(query_text)`** before applying the same `where`, limit, refine, probe, and select handling.
 
-Notable kwargs: `top_k`, `refine_factor`, `n_probe` / `nprobes`, `where` or `_filter`, `table_path`, `table_name`, `search_kwargs`. **Hybrid search with precomputed vectors is not implemented** in this path (`NotImplementedError` if `hybrid=True`).
+Notable kwargs: `top_k`, `refine_factor`, `n_probe` / `nprobes`, `where` or `_filter`, `table_path`, `table_name`, `search_kwargs`, `hybrid`, and `query_texts`. `query_texts` is stripped from constructor kwargs and forwarded only for retrieval calls whose effective mode is hybrid.
 
 Example of **direct** operator use (you supply vectors):
 
@@ -224,9 +225,9 @@ hits = filter_hits_by_content_metadata(
 
 Each hit's `metadata` field is a JSON string. Use **`parse_hit_content_metadata(hit)`** to get a `dict` you can read directly (this is what `filter_hits_by_content_metadata` uses internally). Both helpers are exported from `nemo_retriever.vdb`.
 
-### Not implemented in this path
+### Hybrid retrieval
 
-Hybrid search (`hybrid=True`) is not implemented for the precomputed-vector retrieval path — `LanceDB.retrieval` raises `NotImplementedError`. Filters above apply only to dense vector search.
+Hybrid search (`hybrid=True`) is implemented for LanceDB's precomputed-vector retrieval path. It requires `query_texts` aligned one-to-one with the query vectors so the backend can combine the dense vector query with full-text search. Filters above apply to both dense and hybrid search.
 
 ---
 

@@ -8,6 +8,7 @@
 from nemo_retriever.vdb.factory import get_vdb_op_cls
 from nemo_retriever.vdb.operators import IngestVdbOperator, PutVdbOperator, RetrieveVdbOperator
 from nemo_retriever.vdb.records import RetrievalHit, normalize_retrieval_results, to_client_vdb_records
+from nemo_retriever.vdb.runtime import describe_vdb_runtime
 from nemo_retriever.vdb.sidecar_metadata import (
     apply_sidecar_metadata_to_client_batches,
     build_sidecar_lookup,
@@ -27,6 +28,7 @@
     "RetrieveVdbOperator",
     "normalize_retrieval_results",
     "to_client_vdb_records",
+    "describe_vdb_runtime",
     "apply_sidecar_metadata_to_client_batches",
     "build_sidecar_lookup",
     "filter_hits_by_content_metadata",

@@ -10,7 +10,9 @@
   ingestion of Nemo Retriever Library (NRL) record batches.
 - `retrieval` — nearest-neighbor search over **precomputed query vectors**.
   Query strings are embedded upstream (see `nemo_retriever.Retriever`);
-  the VDB only sees vectors.
+  the VDB search boundary receives vectors as the primary input. Backends
+  that combine dense and lexical evidence may also receive aligned
+  `query_texts` as execution-only retrieval context.
 
 Methods accept `**kwargs` so backend-specific options (e.g. LanceDB's
 `where` predicate for metadata filtering, refinement factors,
@@ -102,9 +104,11 @@ def retrieval(self, queries: list, **kwargs):
 
         Despite the parameter name `queries` (kept for backward
         compatibility), this method receives a list of embedding vectors,
-        one per query — *not* raw text. Query text is embedded upstream,
-        typically inside `nemo_retriever.Retriever`, before this method
-        is called.
+        one per query. Query text is embedded upstream, typically inside
+        `nemo_retriever.Retriever`, before this method is called. Backends
+        that need the raw query string for retrieval-time lexical matching
+        may additionally consume `query_texts` from `kwargs`; those strings
+        must be aligned one-to-one with the input vectors.
 
         Implementations search the index, apply any post-filtering, and
         return a list of hit lists aligned with the input (one inner list
@@ -120,6 +124,10 @@ def retrieval(self, queries: list, **kwargs):
           serialized JSON, e.g.
           `metadata LIKE '%"meta_a":"alpha"%'`.
           The `_filter` alias is accepted in addition to `where`.
+        - query_texts: raw query strings aligned with the input vectors.
+          Dense retrieval backends should not require this. LanceDB hybrid
+          search requires it because the backend combines the precomputed
+          dense vector with a full-text query at search time.
         - refine_factor / nprobes / search_kwargs: ANN tuning passed
           through to the backend.
 
@@ -128,9 +136,9 @@ def retrieval(self, queries: list, **kwargs):
         for the full filter cookbook (sidecar merge, server-side vs
         client-side filtering, escaping).
 
-        Hybrid search with precomputed vectors is not implemented by the
-        reference `LanceDB` backend; passing `hybrid=True` raises
-        `NotImplementedError` on that path.
+        `query_texts` is execution-only context. Operators should avoid
+        persisting it in backend constructor kwargs and pass it only for
+        retrieval calls whose effective mode needs raw text.
         """
         pass
 

@@ -7,6 +7,7 @@
 import os
 import time
 
+from collections.abc import Iterable, Sequence
 from datetime import timedelta
 from typing import Any, Final, FrozenSet
 
@@ -626,7 +627,7 @@ def put(
         counts["put"] = len(rows)
         return counts
 
-    def retrieval(self, vectors, **kwargs):
+    def retrieval(self, vectors: Iterable[Sequence[float]], **kwargs: Any) -> list[list[dict[str, Any]]]:
         """Search LanceDB with precomputed query vectors.
 
         Keyword arguments
@@ -643,10 +644,12 @@ def retrieval(self, vectors, **kwargs):
             ``table.search`` (e.g. ``query_type``, ``fts_columns``). Do not
             pass ``vector_column_name`` here; use the top-level
             ``vector_column_name`` retrieval argument instead.
+        query_texts:
+            Raw query strings aligned with ``vectors``. Required for
+            ``hybrid=True`` and ignored for dense-only retrieval.
         """
         hybrid = kwargs.pop("hybrid", self.hybrid)
-        if hybrid:
-            raise NotImplementedError("LanceDB hybrid retrieval with precomputed vectors is not implemented yet.")
+        query_texts = kwargs.pop("query_texts", None)
         table_path = kwargs.pop("table_path", self.uri)
         table_name = kwargs.pop("table_name", self.table_name)
 
@@ -664,6 +667,23 @@ def retrieval(self, vectors, **kwargs):
         else:
             search_kwargs = dict(search_kwargs_raw)
 
+        if hybrid:
+            if query_texts is None:
+                raise ValueError(
+                    "LanceDB hybrid retrieval requires query_texts. Pass query_texts=your_queries "
+                    "alongside vectors when calling retrieval() with hybrid=True."
+                )
+            query_type = search_kwargs.get("query_type")
+            if query_type is not None:
+                query_type_value = getattr(query_type, "value", query_type)
+                if str(query_type_value).lower() != "hybrid":
+                    raise ValueError(
+                        "LanceDB hybrid retrieval requires search_kwargs['query_type']='hybrid'; "
+                        f"got {query_type!r}."
+                    )
+            search_kwargs["query_type"] = "hybrid"
+            search_kwargs.setdefault("fts_columns", "text")
+
         where_clause = kwargs.pop("where", None)
         _filter_fallback = kwargs.pop("_filter", None)
         if where_clause is None:
@@ -673,9 +693,28 @@ def retrieval(self, vectors, **kwargs):
 
         table = lancedb.connect(uri=table_path).open_table(table_name)
 
+        if hybrid:
+            vectors_for_search = list(vectors)
+            query_texts_list = [query_texts] if isinstance(query_texts, str) else list(query_texts)
+            if len(query_texts_list) != len(vectors_for_search):
+                raise ValueError(
+                    "LanceDB hybrid retrieval requires query_texts length to match vectors length; "
+                    f"got query_texts={len(query_texts_list)} vectors={len(vectors_for_search)}."
+                )
+        else:
+            vectors_for_search = vectors
+            query_texts_list = []
+
         search_results = []
-        for vector in vectors:
-            query = table.search([vector], vector_column_name=vector_column_name, **search_kwargs)
+        for idx, vector in enumerate(vectors_for_search):
+            if hybrid:
+                query = (
+                    table.search(vector_column_name=vector_column_name, **search_kwargs)
+                    .vector(vector)
+                    .text(str(query_texts_list[idx]))
+                )
+            else:
+                query = table.search([vector], vector_column_name=vector_column_name, **search_kwargs)
             if where_clause is not None:
                 query = query.where(where_clause)
             query = query.limit(top_k).refine_factor(refine_factor).nprobes(n_probe)

@@ -201,6 +201,7 @@ def __init__(
     ) -> None:
         merged = dict(vdb_kwargs or {})
         clean_kwargs, _sidecar = split_sidecar_from_vdb_kwargs(merged)
+        clean_kwargs.pop("query_texts", None)
         super().__init__(vdb=vdb, vdb_op=vdb_op, vdb_kwargs=clean_kwargs, explode_for_rerank=explode_for_rerank)
         self._vdb_kwargs = clean_kwargs
         self._retrieval_vdb_kwargs = clean_kwargs
@@ -216,6 +217,12 @@ def process(self, data: Any, **kwargs: Any) -> list[list[dict[str, Any]]]:
         from nemo_retriever.retriever_graph_utils import filter_retrieval_kwargs
 
         retrieval_kwargs = {**self._retrieval_vdb_kwargs, **filter_retrieval_kwargs(kwargs)}
+        if "hybrid" in retrieval_kwargs:
+            effective_hybrid = bool(retrieval_kwargs["hybrid"])
+        else:
+            effective_hybrid = bool(getattr(self._vdb, "hybrid", False))
+        if effective_hybrid and "query_texts" in kwargs:
+            retrieval_kwargs["query_texts"] = kwargs["query_texts"]
         return normalize_retrieval_results(self._vdb.retrieval(data, **retrieval_kwargs))
 
     def postprocess(self, data: Any, **kwargs: Any) -> Any: