From 5bca863493ab56a55188301880ff8677d86aa0cf Mon Sep 17 00:00:00 2001 From: Keith Lee Date: Fri, 13 Feb 2026 13:15:21 +0000 Subject: [PATCH 1/4] Restructure client documentations, now using docusaurus # Conflicts: # bindings/python/README.md # docs/rust-client.md # Conflicts: # docs/rust-client.md --- .github/workflows/release_python.yml | 6 + .gitignore | 21 +- .licenserc.yaml | 3 +- DEVELOPMENT.md | 18 - README.md | 18 - bindings/cpp/README.md | 18 - bindings/python/DEVELOPMENT.md | 19 - bindings/python/README.md | 434 +--------- bindings/python/generate_readme.py | 104 +++ bindings/python/pyproject.toml | 2 +- crates/fluss/README.md | 18 - docs/creating-a-release.md | 18 - docs/generate-release-note.md | 18 - docs/rust-client.md | 755 ------------------ docs/verifying-a-release-candidate.md | 18 - justfile | 4 + website/babel.config.js | 3 + website/docs/developer-guide/_category_.json | 4 + website/docs/developer-guide/contributing.md | 126 +++ website/docs/developer-guide/release.md | 181 +++++ website/docs/index.md | 33 + website/docs/user-guide/_category_.json | 4 + website/docs/user-guide/cpp/_category_.json | 4 + website/docs/user-guide/cpp/api-reference.md | 486 +++++++++++ website/docs/user-guide/cpp/data-types.md | 102 +++ website/docs/user-guide/cpp/error-handling.md | 134 ++++ .../user-guide/cpp/example/_category_.json | 4 + .../cpp/example/admin-operations.md | 111 +++ .../user-guide/cpp/example/configuration.md | 49 ++ website/docs/user-guide/cpp/example/index.md | 70 ++ .../docs/user-guide/cpp/example/log-tables.md | 117 +++ .../cpp/example/partitioned-tables.md | 175 ++++ .../cpp/example/primary-key-tables.md | 135 ++++ website/docs/user-guide/cpp/installation.md | 107 +++ .../docs/user-guide/python/_category_.json | 4 + .../docs/user-guide/python/api-reference.md | 40 +- website/docs/user-guide/python/data-types.md | 21 + .../docs/user-guide/python/error-handling.md | 19 + .../user-guide/python/example/_category_.json | 4 + .../python/example/admin-operations.md | 77 ++ .../python/example/configuration.md | 34 + .../docs/user-guide/python/example/index.md | 46 ++ .../user-guide/python/example/log-tables.md | 110 +++ .../python/example/partitioned-tables.md | 96 +++ .../python/example/primary-key-tables.md | 61 ++ .../docs/user-guide/python/installation.md | 41 + website/docs/user-guide/rust/_category_.json | 4 + website/docs/user-guide/rust/api-reference.md | 427 ++++++++++ website/docs/user-guide/rust/data-types.md | 46 ++ .../docs/user-guide/rust/error-handling.md | 151 ++++ .../user-guide/rust/example/_category_.json | 4 + .../rust/example/admin-operations.md | 118 +++ .../user-guide/rust/example/configuration.md | 24 + website/docs/user-guide/rust/example/index.md | 56 ++ .../user-guide/rust/example/log-tables.md | 124 +++ .../rust/example/partitioned-tables.md | 215 +++++ .../rust/example/primary-key-tables.md | 114 +++ website/docs/user-guide/rust/installation.md | 76 ++ website/docusaurus.config.ts | 84 ++ website/package.json | 43 + website/sidebars.ts | 24 + website/src/css/custom.css | 209 +++++ website/static/img/logo/fluss_favicon.svg | 19 + website/static/img/logo/png/colored_logo.png | Bin 0 -> 45184 bytes website/static/img/logo/svg/colored_logo.svg | 19 + website/tsconfig.json | 6 + 66 files changed, 4264 insertions(+), 1371 deletions(-) create mode 100644 bindings/python/generate_readme.py create mode 100644 website/babel.config.js create mode 100644 website/docs/developer-guide/_category_.json create mode 100644 website/docs/developer-guide/contributing.md create mode 100644 website/docs/developer-guide/release.md create mode 100644 website/docs/index.md create mode 100644 website/docs/user-guide/_category_.json create mode 100644 website/docs/user-guide/cpp/_category_.json create mode 100644 website/docs/user-guide/cpp/api-reference.md create mode 100644 website/docs/user-guide/cpp/data-types.md create mode 100644 website/docs/user-guide/cpp/error-handling.md create mode 100644 website/docs/user-guide/cpp/example/_category_.json create mode 100644 website/docs/user-guide/cpp/example/admin-operations.md create mode 100644 website/docs/user-guide/cpp/example/configuration.md create mode 100644 website/docs/user-guide/cpp/example/index.md create mode 100644 website/docs/user-guide/cpp/example/log-tables.md create mode 100644 website/docs/user-guide/cpp/example/partitioned-tables.md create mode 100644 website/docs/user-guide/cpp/example/primary-key-tables.md create mode 100644 website/docs/user-guide/cpp/installation.md create mode 100644 website/docs/user-guide/python/_category_.json rename bindings/python/API_REFERENCE.md => website/docs/user-guide/python/api-reference.md (87%) create mode 100644 website/docs/user-guide/python/data-types.md create mode 100644 website/docs/user-guide/python/error-handling.md create mode 100644 website/docs/user-guide/python/example/_category_.json create mode 100644 website/docs/user-guide/python/example/admin-operations.md create mode 100644 website/docs/user-guide/python/example/configuration.md create mode 100644 website/docs/user-guide/python/example/index.md create mode 100644 website/docs/user-guide/python/example/log-tables.md create mode 100644 website/docs/user-guide/python/example/partitioned-tables.md create mode 100644 website/docs/user-guide/python/example/primary-key-tables.md create mode 100644 website/docs/user-guide/python/installation.md create mode 100644 website/docs/user-guide/rust/_category_.json create mode 100644 website/docs/user-guide/rust/api-reference.md create mode 100644 website/docs/user-guide/rust/data-types.md create mode 100644 website/docs/user-guide/rust/error-handling.md create mode 100644 website/docs/user-guide/rust/example/_category_.json create mode 100644 website/docs/user-guide/rust/example/admin-operations.md create mode 100644 website/docs/user-guide/rust/example/configuration.md create mode 100644 website/docs/user-guide/rust/example/index.md create mode 100644 website/docs/user-guide/rust/example/log-tables.md create mode 100644 website/docs/user-guide/rust/example/partitioned-tables.md create mode 100644 website/docs/user-guide/rust/example/primary-key-tables.md create mode 100644 website/docs/user-guide/rust/installation.md create mode 100644 website/docusaurus.config.ts create mode 100644 website/package.json create mode 100644 website/sidebars.ts create mode 100644 website/src/css/custom.css create mode 100644 website/static/img/logo/fluss_favicon.svg create mode 100644 website/static/img/logo/png/colored_logo.png create mode 100644 website/static/img/logo/svg/colored_logo.svg create mode 100644 website/tsconfig.json diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index 59c8f8c9..d7bd04cc 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -49,6 +49,9 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Generate Python README + run: python bindings/python/generate_readme.py + - name: Install protoc run: sudo apt-get update && sudo apt-get install -y protobuf-compiler @@ -78,6 +81,9 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Generate Python README + run: python3 bindings/python/generate_readme.py + - name: Install protoc (Linux) if: runner.os == 'Linux' run: sudo apt-get update && sudo apt-get install -y protobuf-compiler diff --git a/.gitignore b/.gitignore index 9c585d86..476f84e9 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ Cargo.lock .vscode/ # Python +bindings/python/GENERATED_README.md __pycache__/ *.py[cod] *$py.class @@ -29,4 +30,22 @@ dist/ build/ # CPP -*CMakeFiles/ \ No newline at end of file +*CMakeFiles/ + +# Website (Docusaurus) +website/node_modules +website/build +website/.docusaurus +website/.cache-loader +website/.env.local +website/.env.development.local +website/.env.test.local +website/.env.production.local +website/npm-debug.log* +website/yarn-debug.log* +website/yarn-error.log* +website/package-lock.json +website/versioned_docs +website/versioned_sidebars +website/versions.json +website/pnpm-lock.yaml \ No newline at end of file diff --git a/.licenserc.yaml b/.licenserc.yaml index a3cfcd14..d3238563 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -21,10 +21,11 @@ header: copyright-owner: Apache Software Foundation paths-ignore: - - '.github/PULL_REQUEST_TEMPLATE.md' - '.gitignore' - 'LICENSE' - 'NOTICE' - 'DISCLAIMER' - 'bindings/python/fluss/py.typed' + - 'website/**' + - '**/*.md' comment: on-failure diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index a0669a75..a1180d6f 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -1,21 +1,3 @@ - - # Development Guide Welcome to the development guide of `fluss-rust`! This project builds `fluss-rust` client and language specific bindings. diff --git a/README.md b/README.md index a42c0f36..dafe19c8 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,3 @@ - - # Apache Fluss™ Rust (Incubating) ![Experimental](https://img.shields.io/badge/status-experimental-orange) diff --git a/bindings/cpp/README.md b/bindings/cpp/README.md index 2556a4b6..539e3130 100644 --- a/bindings/cpp/README.md +++ b/bindings/cpp/README.md @@ -1,21 +1,3 @@ - - # Apache Fluss™ C++ Bindings (Incubating) C++ bindings for Fluss, built on top of the [fluss-rust](../../crates/fluss) client. The API is exposed via a C++ header ([include/fluss.hpp](include/fluss.hpp)) and implemented with Rust FFI. diff --git a/bindings/python/DEVELOPMENT.md b/bindings/python/DEVELOPMENT.md index e316f5e8..cccd0d1e 100644 --- a/bindings/python/DEVELOPMENT.md +++ b/bindings/python/DEVELOPMENT.md @@ -1,22 +1,3 @@ - - # Development ## Requirements diff --git a/bindings/python/README.md b/bindings/python/README.md index 20c5f552..54a167bc 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -18,436 +18,4 @@ # Fluss Python Client -This guide covers how to use the Fluss Python client for reading and writing data to log tables and primary key tables. - -The Python client is async-first, built on top of the Rust core via [PyO3](https://pyo3.rs/), and uses [PyArrow](https://arrow.apache.org/docs/python/) for schema definitions and data interchange. - -## Key Concepts - -- **Log table** — an append-only table (no primary key). Records are immutable once written. Use for event streams, logs, and audit trails. -- **Primary key (PK) table** — a table with a primary key. Supports upsert, delete, and point lookups. -- **Bucket** — the unit of parallelism within a table (similar to Kafka partitions). Each table has one or more buckets. Readers subscribe to individual buckets. -- **Partition** — a way to organize data by column values (e.g. by date or region). Each partition contains its own set of buckets. Partitions must be created explicitly before writing. -- **Offset** — the position of a record within a bucket. Used to track reading progress. Start from `EARLIEST_OFFSET` to read all data, or `LATEST_OFFSET` to only read new records. - -## Prerequisites - -You need a running Fluss cluster to use the Python client. See the [Quick-Start guide](../../README.md#quick-start) for how to start a local cluster. - -## Installation - -```bash -pip install pyfluss -``` - -To build from source instead, see the [Development Guide](DEVELOPMENT.md). - -## Quick Start - -A minimal end-to-end example: connect, create a table, write data, and read it back. Assumes a Fluss cluster is running on `localhost:9123`. - -```python -import asyncio -import pyarrow as pa -import fluss - -async def main(): - # Connect - config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"}) - conn = await fluss.FlussConnection.create(config) - admin = await conn.get_admin() - - # Create a log table - schema = fluss.Schema(pa.schema([ - pa.field("id", pa.int32()), - pa.field("name", pa.string()), - pa.field("score", pa.float32()), - ])) - table_path = fluss.TablePath("fluss", "quick_start") - await admin.create_table(table_path, fluss.TableDescriptor(schema), ignore_if_exists=True) - - # Write - table = await conn.get_table(table_path) - writer = table.new_append().create_writer() - writer.append({"id": 1, "name": "Alice", "score": 95.5}) - writer.append({"id": 2, "name": "Bob", "score": 87.0}) - await writer.flush() - - # Read - num_buckets = (await admin.get_table_info(table_path)).num_buckets - scanner = await table.new_scan().create_record_batch_log_scanner() - scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) - print(scanner.to_pandas()) - - # Cleanup - await admin.drop_table(table_path, ignore_if_not_exists=True) - conn.close() - -asyncio.run(main()) -``` - -## Connection Setup - -```python -config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"}) -conn = await fluss.FlussConnection.create(config) -``` - -The connection also supports context managers: - -```python -with await fluss.FlussConnection.create(config) as conn: - ... -``` - -### Configuration Options - -| Key | Description | Default | -|-----|-------------|---------| -| `bootstrap.servers` | Coordinator server address | `127.0.0.1:9123` | -| `request.max.size` | Maximum request size in bytes | `10485760` (10 MB) | -| `writer.acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | -| `writer.retries` | Number of retries on failure | `2147483647` | -| `writer.batch.size` | Batch size for writes in bytes | `2097152` (2 MB) | - -## Admin Operations - -```python -admin = await conn.get_admin() -``` - -### Databases - -```python -await admin.create_database("my_database", ignore_if_exists=True) -databases = await admin.list_databases() -exists = await admin.database_exists("my_database") -await admin.drop_database("my_database", ignore_if_not_exists=True, cascade=True) -``` - -### Tables - -Schemas are defined using PyArrow and wrapped in `fluss.Schema`: - -```python -import pyarrow as pa - -schema = fluss.Schema(pa.schema([ - pa.field("id", pa.int32()), - pa.field("name", pa.string()), - pa.field("amount", pa.int64()), -])) - -table_path = fluss.TablePath("my_database", "my_table") -await admin.create_table(table_path, fluss.TableDescriptor(schema), ignore_if_exists=True) - -table_info = await admin.get_table_info(table_path) -tables = await admin.list_tables("my_database") -await admin.drop_table(table_path, ignore_if_not_exists=True) -``` - -`TableDescriptor` accepts these optional parameters: - -| Parameter | Description | -|---|---| -| `partition_keys` | Column names to partition by (e.g. `["region"]`) | -| `bucket_count` | Number of buckets (parallelism units) for the table | -| `bucket_keys` | Columns used to determine bucket assignment | -| `comment` | Table comment / description | -| `log_format` | Log storage format: `"ARROW"` or `"INDEXED"` | -| `kv_format` | KV storage format for primary key tables: `"INDEXED"` or `"COMPACTED"` | -| `properties` | Table configuration properties as a dict (e.g. `{"table.replication.factor": "1"}`) | -| `custom_properties` | User-defined properties as a dict | - -### Offsets - -```python -# Latest offsets for buckets -offsets = await admin.list_offsets(table_path, bucket_ids=[0, 1], offset_type="latest") - -# By timestamp -offsets = await admin.list_offsets(table_path, bucket_ids=[0], offset_type="timestamp", timestamp=1704067200000) - -# Per-partition offsets -offsets = await admin.list_partition_offsets(table_path, partition_name="US", bucket_ids=[0], offset_type="latest") -``` - -## Log Tables - -Log tables are append-only tables without primary keys, suitable for event streaming. - -### Writing - -Rows can be appended as dicts, lists, or tuples. For bulk writes, use `write_arrow()`, `write_arrow_batch()`, or `write_pandas()`. - -Write methods like `append()` and `write_arrow_batch()` return a `WriteResultHandle`. You can ignore it for fire-and-forget semantics (flush at the end), or `await handle.wait()` to block until the server acknowledges that specific write. - -```python -table = await conn.get_table(table_path) -writer = table.new_append().create_writer() - -# Fire-and-forget: queue writes, flush at the end -writer.append({"id": 1, "name": "Alice", "score": 95.5}) -writer.append([2, "Bob", 87.0]) -await writer.flush() - -# Per-record acknowledgment -handle = writer.append({"id": 3, "name": "Charlie", "score": 91.0}) -await handle.wait() - -# Bulk writes -writer.write_arrow(pa_table) # PyArrow Table -writer.write_arrow_batch(record_batch) # PyArrow RecordBatch -writer.write_pandas(df) # Pandas DataFrame -await writer.flush() -``` - -### Reading - -There are two scanner types: -- **Batch scanner** (`create_record_batch_log_scanner()`) — returns Arrow Tables or DataFrames, best for analytics -- **Record scanner** (`create_log_scanner()`) — returns individual records with metadata (offset, timestamp, change type), best for streaming - -And two reading modes: -- **`to_arrow()` / `to_pandas()`** — reads all data from subscribed buckets up to the current latest offset, then returns. Best for one-shot batch reads. -- **`poll_arrow()` / `poll()` / `poll_record_batch()`** — returns whatever data is available within the timeout, then returns. Call in a loop for continuous streaming. - -#### Batch Read (One-Shot) - -```python -num_buckets = (await admin.get_table_info(table_path)).num_buckets - -scanner = await table.new_scan().create_record_batch_log_scanner() -scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) - -# Reads everything up to current latest offset, then returns -arrow_table = scanner.to_arrow() -df = scanner.to_pandas() -``` - -#### Continuous Polling - -Use `poll_arrow()` or `poll()` in a loop for streaming consumption: - -```python -# Batch scanner: poll as Arrow Tables -scanner = await table.new_scan().create_record_batch_log_scanner() -scanner.subscribe(bucket_id=0, start_offset=fluss.EARLIEST_OFFSET) - -while True: - result = scanner.poll_arrow(timeout_ms=5000) - if result.num_rows > 0: - print(result.to_pandas()) - -# Record scanner: poll individual records with metadata -scanner = await table.new_scan().create_log_scanner() -scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) - -while True: - for record in scanner.poll(timeout_ms=5000): - print(f"offset={record.offset}, change={record.change_type.short_string()}, row={record.row}") -``` - -#### Subscribe from Latest Offset - -To only consume new records (skip existing data), use `LATEST_OFFSET`: - -```python -scanner = await table.new_scan().create_record_batch_log_scanner() -scanner.subscribe(bucket_id=0, start_offset=fluss.LATEST_OFFSET) -``` - -### Column Projection - -```python -scanner = await table.new_scan().project([0, 2]).create_record_batch_log_scanner() -# or by name -scanner = await table.new_scan().project_by_name(["id", "score"]).create_record_batch_log_scanner() -``` - -## Primary Key Tables - -Primary key tables support upsert, delete, and point lookup operations. - -### Creating - -Pass `primary_keys` to `fluss.Schema`: - -```python -schema = fluss.Schema( - pa.schema([ - pa.field("id", pa.int32()), - pa.field("name", pa.string()), - pa.field("age", pa.int64()), - ]), - primary_keys=["id"], -) -table_path = fluss.TablePath("fluss", "users") -await admin.create_table(table_path, fluss.TableDescriptor(schema, bucket_count=3), ignore_if_exists=True) -``` - -### Upsert, Delete, Lookup - -```python -table = await conn.get_table(table_path) - -# Upsert (fire-and-forget, flush at the end) -writer = table.new_upsert() -writer.upsert({"id": 1, "name": "Alice", "age": 25}) -writer.upsert({"id": 2, "name": "Bob", "age": 30}) -await writer.flush() - -# Per-record acknowledgment (for read-after-write) -handle = writer.upsert({"id": 3, "name": "Charlie", "age": 35}) -await handle.wait() - -# Delete by primary key -handle = writer.delete({"id": 2}) -await handle.wait() - -# Lookup -lookuper = table.new_lookup() -result = await lookuper.lookup({"id": 1}) -if result: - print(f"Found: name={result['name']}, age={result['age']}") -``` - -### Partial Updates - -Update specific columns while preserving others: - -```python -partial_writer = table.new_upsert(columns=["id", "age"]) -partial_writer.upsert({"id": 1, "age": 27}) # only updates age -await partial_writer.flush() -``` - -## Partitioned Tables - -Partitioned tables distribute data across partitions based on column values. Partitions must be created before writing. - -### Creating and Managing Partitions - -```python -schema = fluss.Schema(pa.schema([ - pa.field("id", pa.int32()), - pa.field("region", pa.string()), - pa.field("value", pa.int64()), -])) - -table_path = fluss.TablePath("fluss", "partitioned_events") -await admin.create_table( - table_path, - fluss.TableDescriptor(schema, partition_keys=["region"], bucket_count=1), - ignore_if_exists=True, -) - -# Create partitions -await admin.create_partition(table_path, {"region": "US"}, ignore_if_exists=True) -await admin.create_partition(table_path, {"region": "EU"}, ignore_if_exists=True) - -# List partitions -partition_infos = await admin.list_partition_infos(table_path) -``` - -### Writing - -Same as non-partitioned tables — include partition column values in each row: - -```python -table = await conn.get_table(table_path) -writer = table.new_append().create_writer() -writer.append({"id": 1, "region": "US", "value": 100}) -writer.append({"id": 2, "region": "EU", "value": 200}) -await writer.flush() -``` - -### Reading - -Use `subscribe_partition()` or `subscribe_partition_buckets()` instead of `subscribe()`: - -```python -scanner = await table.new_scan().create_record_batch_log_scanner() - -# Subscribe to individual partitions -for p in partition_infos: - scanner.subscribe_partition(partition_id=p.partition_id, bucket_id=0, start_offset=fluss.EARLIEST_OFFSET) - -# Or batch-subscribe -scanner.subscribe_partition_buckets({ - (p.partition_id, 0): fluss.EARLIEST_OFFSET for p in partition_infos -}) - -print(scanner.to_pandas()) -``` - -### Partitioned Primary Key Tables - -Partition columns must be part of the primary key. Partitions must be created before upserting. - -```python -schema = fluss.Schema( - pa.schema([ - pa.field("user_id", pa.int32()), - pa.field("region", pa.string()), - pa.field("score", pa.int64()), - ]), - primary_keys=["user_id", "region"], -) - -table_path = fluss.TablePath("fluss", "partitioned_users") -await admin.create_table( - table_path, - fluss.TableDescriptor(schema, partition_keys=["region"]), - ignore_if_exists=True, -) - -await admin.create_partition(table_path, {"region": "US"}, ignore_if_exists=True) - -table = await conn.get_table(table_path) -writer = table.new_upsert() -writer.upsert({"user_id": 1, "region": "US", "score": 1234}) -await writer.flush() - -# Lookup includes partition columns -lookuper = table.new_lookup() -result = await lookuper.lookup({"user_id": 1, "region": "US"}) -``` - -## Error Handling - -The client raises `fluss.FlussError` for Fluss-specific errors (connection failures, table not found, invalid operations, etc.): - -```python -try: - await admin.create_table(table_path, table_descriptor) -except fluss.FlussError as e: - print(f"Fluss error: {e.message}") -``` - -Common error scenarios: -- **Connection refused** — Fluss cluster is not running or wrong address in `bootstrap.servers` -- **Table not found** — table doesn't exist or wrong database/table name -- **Partition not found** — writing to a partitioned table before creating partitions -- **Schema mismatch** — row data doesn't match the table schema - -## Data Types - -The Python client uses PyArrow types for schema definitions: - -| PyArrow Type | Fluss Type | Python Type | -|---|---|---| -| `pa.boolean()` | Boolean | `bool` | -| `pa.int8()` / `int16()` / `int32()` / `int64()` | TinyInt / SmallInt / Int / BigInt | `int` | -| `pa.float32()` / `float64()` | Float / Double | `float` | -| `pa.string()` | String | `str` | -| `pa.binary()` | Bytes | `bytes` | -| `pa.date32()` | Date | `datetime.date` | -| `pa.time32("ms")` | Time | `datetime.time` | -| `pa.timestamp("us")` | Timestamp (NTZ) | `datetime.datetime` | -| `pa.timestamp("us", tz="UTC")` | TimestampLTZ | `datetime.datetime` | -| `pa.decimal128(precision, scale)` | Decimal | `decimal.Decimal` | - -All Python native types (`date`, `time`, `datetime`, `Decimal`) work when appending rows via dicts. - -For a complete list of classes, methods, and properties, see the [API Reference](API_REFERENCE.md). +For full documentation, see the [Python user guide](../../website/docs/user-guide/python/). diff --git a/bindings/python/generate_readme.py b/bindings/python/generate_readme.py new file mode 100644 index 00000000..206f9e2a --- /dev/null +++ b/bindings/python/generate_readme.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generate bindings/python/GENERATED_README.md from the website docs. + +Usage: + python generate_readme.py # writes GENERATED_README.md + python generate_readme.py --check # exits non-zero if GENERATED_README.md is stale +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +DOCS_DIR = SCRIPT_DIR / "../../website/docs/user-guide/python" + +LICENSE_HEADER = """\ + +""" + +# Files in the order they should appear in the README. +SECTIONS: list[str] = [ + "installation.md", + "example/index.md", + "example/configuration.md", + "example/admin-operations.md", + "example/log-tables.md", + "example/primary-key-tables.md", + "example/partitioned-tables.md", + "error-handling.md", + "data-types.md", + "api-reference.md", +] + +FRONTMATTER_RE = re.compile(r"^---\n.*?^---\n", re.MULTILINE | re.DOTALL) + + +def strip_frontmatter(text: str) -> str: + return FRONTMATTER_RE.sub("", text, count=1) + + +def build_readme() -> str: + parts = [LICENSE_HEADER, "# Fluss Python Client\n"] + + for section in SECTIONS: + path = DOCS_DIR / section + if not path.exists(): + print(f"warning: {path} not found, skipping", file=sys.stderr) + continue + content = strip_frontmatter(path.read_text()).strip() + parts.append(content) + + return "\n\n".join(parts) + "\n" + + +def main() -> None: + readme = build_readme() + dest = SCRIPT_DIR / "GENERATED_README.md" + + if "--check" in sys.argv: + if not dest.exists() or dest.read_text() != readme: + print("GENERATED_README.md is out of date. Run: python generate_readme.py") + sys.exit(1) + print("GENERATED_README.md is up to date.") + return + + dest.write_text(readme) + print(f"Wrote {dest}") + + +if __name__ == "__main__": + main() diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 0e61b234..0be25a03 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -24,7 +24,7 @@ name = "pyfluss" description = "Apache Fluss (incubating) Python Binding" authors = [{name = "Apache Fluss", email = "dev@fluss.apache.org"}] license = {text = "Apache-2.0"} -readme = "README.md" +readme = "GENERATED_README.md" requires-python = ">=3.9" classifiers = [ "License :: OSI Approved :: Apache Software License", diff --git a/crates/fluss/README.md b/crates/fluss/README.md index bee8ce84..aad8de96 100644 --- a/crates/fluss/README.md +++ b/crates/fluss/README.md @@ -1,21 +1,3 @@ - - # Apache Fluss™ Rust Client (Incubating) Rust client library for [Apache Fluss™](https://fluss.apache.org/). This crate provides the core client used by the fluss-rust workspace and by the Python and C++ bindings. diff --git a/docs/creating-a-release.md b/docs/creating-a-release.md index 164b64b7..8b4c868f 100644 --- a/docs/creating-a-release.md +++ b/docs/creating-a-release.md @@ -1,21 +1,3 @@ - - # Creating a Fluss Rust Client Release This document describes in detail how to create a release of the **Fluss clients** (fluss-rust, fluss-python, fluss-cpp) from the [fluss-rust](https://github.com/apache/fluss-rust) repository. It is based on the [Creating a Fluss Release](https://fluss.apache.org/community/how-to-release/creating-a-fluss-release/) guide of the Apache Fluss project and the [release guide of Apache OpenDAL](https://nightlies.apache.org/opendal/opendal-docs-stable/community/release/); releases are source archives plus CI-published crates.io and PyPI. diff --git a/docs/generate-release-note.md b/docs/generate-release-note.md index 1167f4cd..edbc43bd 100644 --- a/docs/generate-release-note.md +++ b/docs/generate-release-note.md @@ -1,21 +1,3 @@ - - # Generate Release Note Use GitHub's **Generate release notes** to produce a draft from merged PRs between tags. Categories (Added, Fixed, Docs, etc.) are configured in [.github/release.yml](../.github/release.yml). diff --git a/docs/rust-client.md b/docs/rust-client.md index 03968829..e69de29b 100644 --- a/docs/rust-client.md +++ b/docs/rust-client.md @@ -1,755 +0,0 @@ - - -# Fluss Rust Client Guide - -This guide covers how to use the Fluss Rust client for reading and writing data to log tables and primary key tables. - -## Adding to Your Project - -The Fluss Rust client is published to [crates.io](https://crates.io/crates/fluss-rs) as `fluss-rs`. The crate's library name is `fluss`, so you import it with `use fluss::...`. - -```toml -[dependencies] -fluss-rs = "0.1" -tokio = { version = "1", features = ["full"] } -``` - -### Feature Flags - -The Fluss crate supports optional storage backends: - -```toml -[dependencies] -# Default: memory and filesystem storage -fluss-rs = "0.1" - -# With S3 storage support -fluss-rs = { version = "0.1", features = ["storage-s3"] } - -# With OSS storage support -fluss-rs = { version = "0.1", features = ["storage-oss"] } - -# All storage backends -fluss-rs = { version = "0.1", features = ["storage-all"] } -``` - -Available features: -- `storage-memory` (default) - In-memory storage -- `storage-fs` (default) - Local filesystem storage -- `storage-s3` - Amazon S3 storage -- `storage-oss` - Alibaba OSS storage -- `storage-all` - All storage backends - -### Alternative: Git or Path Dependency - -For development against unreleased changes, you can depend on the Git repository or a local checkout: - -```toml -[dependencies] -# From Git -fluss = { git = "https://github.com/apache/fluss-rust.git", package = "fluss-rs" } - -# From local path -fluss = { path = "/path/to/fluss-rust/crates/fluss", package = "fluss-rs" } -``` - -> **Note:** When using `git` or `path` dependencies, the `package = "fluss-rs"` field is required so that Cargo resolves the correct package while still allowing `use fluss::...` imports. - -## Building from Source - -### Prerequisites - -- Rust 1.85+ -- Protobuf compiler (`protoc`) - only required when [building from source](#building-from-source) - - -### 1. Clone the Repository - -```bash -git clone https://github.com/apache/fluss-rust.git -cd fluss-rust -``` - -### 2. Install Dependencies - -The Protobuf compiler (`protoc`) is required to build from source. - -#### macOS - -```bash -brew install protobuf -``` - -#### Ubuntu/Debian - -```bash -sudo apt-get install protobuf-compiler -``` - -### 3. Build the Library - -```bash -cargo build --workspace --all-targets -``` - -## Connection Setup - -```rust -use fluss::client::FlussConnection; -use fluss::config::Config; -use fluss::error::Result; - -#[tokio::main] -async fn main() -> Result<()> { - let mut config = Config::default(); - config.bootstrap_servers = "127.0.0.1:9123".to_string(); - - let conn = FlussConnection::new(config).await?; - - // Use the connection... - - Ok(()) -} -``` - -### Configuration Options - -| Option | Description | Default | -|--------|-------------|---------| -| `bootstrap_servers` | Coordinator server address | `127.0.0.1:9123` | -| `writer_request_max_size` | Maximum request size in bytes | 10 MB | -| `writer_acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | -| `writer_retries` | Number of retries on failure | `i32::MAX` | -| `writer_batch_size` | Batch size for writes | 2 MB | - -## Admin Operations - -### Get Admin Interface - -```rust -let admin = conn.get_admin().await?; -``` - -### Database Operations - -```rust -// Create database -admin.create_database("my_database", None, true).await?; - -// List all databases -let databases = admin.list_databases().await?; -println!("Databases: {:?}", databases); - -// Check if database exists -let exists = admin.database_exists("my_database").await?; - -// Get database information -let db_info = admin.get_database_info("my_database").await?; - -// Drop database -admin.drop_database("my_database", true, false).await?; -``` - -### Table Operations - -```rust -use fluss::metadata::{DataTypes, Schema, TableDescriptor, TablePath}; - -// Define table schema -let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("id", DataTypes::int()) - .column("name", DataTypes::string()) - .column("amount", DataTypes::bigint()) - .build()?, - ) - .build()?; - -let table_path = TablePath::new("my_database", "my_table"); - -// Create table -admin.create_table(&table_path, &table_descriptor, true).await?; - -// Get table information -let table_info = admin.get_table_info(&table_path).await?; -println!("Table: {}", table_info); - -// List tables in database -let tables = admin.list_tables("my_database").await?; - -// Check if table exists -let exists = admin.table_exists(&table_path).await?; - -// Drop table -admin.drop_table(&table_path, true).await?; -``` - -### Partition Operations - -```rust -use fluss::metadata::PartitionSpec; -use std::collections::HashMap; - -// List all partitions -let partitions = admin.list_partition_infos(&table_path).await?; - -// List partitions matching a spec -let mut filter = HashMap::new(); -filter.insert("year", "2024"); -let spec = PartitionSpec::new(filter); -let partitions = admin.list_partition_infos_with_spec(&table_path, Some(&spec)).await?; - -// Create partition -admin.create_partition(&table_path, &spec, true).await?; - -// Drop partition -admin.drop_partition(&table_path, &spec, true).await?; -``` - -### Offset Operations - -```rust -use fluss::rpc::message::OffsetSpec; - -let bucket_ids = vec![0, 1, 2]; - -// Get earliest offsets -let earliest = admin.list_offsets(&table_path, &bucket_ids, OffsetSpec::Earliest).await?; - -// Get latest offsets -let latest = admin.list_offsets(&table_path, &bucket_ids, OffsetSpec::Latest).await?; - -// Get offsets for a specific timestamp -let timestamp_ms = 1704067200000; // 2024-01-01 00:00:00 UTC -let offsets = admin.list_offsets(&table_path, &bucket_ids, OffsetSpec::Timestamp(timestamp_ms)).await?; - -// Get offsets for a specific partition -let partition_offsets = admin.list_partition_offsets( - &table_path, - "partition_name", - &bucket_ids, - OffsetSpec::Latest, -).await?; -``` - -### Lake Snapshot - -```rust -// Get latest lake snapshot for lakehouse integration -let snapshot = admin.get_latest_lake_snapshot(&table_path).await?; -println!("Snapshot ID: {}", snapshot.snapshot_id); -``` - -## Log Table Operations - -Log tables are append-only tables without primary keys, suitable for event streaming. - -### Creating a Log Table - -```rust -let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("event_id", DataTypes::int()) - .column("event_type", DataTypes::string()) - .column("timestamp", DataTypes::bigint()) - .build()?, - ) - .build()?; - -let table_path = TablePath::new("fluss", "events"); -admin.create_table(&table_path, &table_descriptor, true).await?; -``` - -### Writing to Log Tables - -```rust -use fluss::row::{GenericRow, InternalRow}; - -let table = conn.get_table(&table_path).await?; -let append_writer = table.new_append()?.create_writer()?; - -// Write a single row -let mut row = GenericRow::new(3); -row.set_field(0, 1); // event_id (int) -row.set_field(1, "user_login"); // event_type (string) -row.set_field(2, 1704067200000i64); // timestamp (bigint) - -append_writer.append(&row)?; - -// Write multiple rows -let mut row2 = GenericRow::new(3); -row2.set_field(0, 2); -row2.set_field(1, "page_view"); -row2.set_field(2, 1704067201000i64); - -append_writer.append(&row2)?; - -// Flush to ensure data is persisted -append_writer.flush().await?; -``` - -Write operations (`append`, `upsert`, `delete`) use a **fire-and-forget** pattern for efficient batching. Each call queues the write and returns a `WriteResultFuture` immediately. Call `flush()` to ensure all queued writes are sent to the server. - -If you need per-record acknowledgment, you can await the returned future: - -```rust -// Per-record acknowledgment (blocks until server confirms) -append_writer.append(&row)?.await?; -``` - -### Reading from Log Tables - -```rust -use std::time::Duration; - -let table = conn.get_table(&table_path).await?; -let log_scanner = table.new_scan().create_log_scanner()?; - -// Subscribe to bucket 0 starting from offset 0 -log_scanner.subscribe(0, 0).await?; - -// Poll for records -let records = log_scanner.poll(Duration::from_secs(10)).await?; - -for record in records { - let row = record.row(); - println!( - "event_id={}, event_type={}, timestamp={} @ offset={}", - row.get_int(0), - row.get_string(1), - row.get_long(2), - record.offset() - ); -} -``` - -### Column Projection - -```rust -// Project specific columns by index -let scanner = table.new_scan().project(&[0, 2])?.create_log_scanner()?; - -// Or project by column names -let scanner = table.new_scan().project_by_name(&["event_id", "timestamp"])?.create_log_scanner()?; -``` - -### Subscribe from Specific Offsets - -```rust -use fluss::client::{EARLIEST_OFFSET, LATEST_OFFSET}; - -// Subscribe from earliest available offset -log_scanner.subscribe(0, EARLIEST_OFFSET).await?; - -// Subscribe from latest offset (only new records) -log_scanner.subscribe(0, LATEST_OFFSET).await?; - -// Subscribe from a specific offset -log_scanner.subscribe(0, 42).await?; - -// Subscribe to all buckets -let num_buckets = table.get_table_info().get_num_buckets(); -for bucket_id in 0..num_buckets { - log_scanner.subscribe(bucket_id, 0).await?; -} -``` - -### Subscribe to Multiple Buckets - -```rust -use std::collections::HashMap; - -// Subscribe to multiple buckets at once with specific offsets -let mut bucket_offsets = HashMap::new(); -bucket_offsets.insert(0, 0i64); // bucket 0 from offset 0 -bucket_offsets.insert(1, 100i64); // bucket 1 from offset 100 -log_scanner.subscribe_buckets(&bucket_offsets).await?; -``` - -### Unsubscribe from a Bucket - -```rust -// Unsubscribe from a specific bucket (non-partitioned tables) -log_scanner.unsubscribe(bucket_id).await?; -``` - -### Unsubscribe from a Partition - -```rust -// Unsubscribe from a specific partition bucket -log_scanner.unsubscribe_partition(partition_id, bucket_id).await?; -``` - -## Partitioned Log Tables - -Partitioned tables distribute data across partitions based on partition column values, enabling efficient data organization and querying. - -### Creating a Partitioned Log Table - -```rust -use fluss::metadata::{DataTypes, LogFormat, Schema, TableDescriptor, TablePath}; - -let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("event_id", DataTypes::int()) - .column("event_type", DataTypes::string()) - .column("dt", DataTypes::string()) // partition column - .column("region", DataTypes::string()) // partition column - .build()?, - ) - .partitioned_by(vec!["dt", "region"]) // Define partition columns - .log_format(LogFormat::ARROW) - .build()?; - -let table_path = TablePath::new("fluss", "partitioned_events"); -admin.create_table(&table_path, &table_descriptor, true).await?; -``` - -### Writing to Partitioned Log Tables - -Writing works the same as non-partitioned tables. Include partition column values in each row: - -```rust -let table = conn.get_table(&table_path).await?; -let append_writer = table.new_append()?.create_writer()?; - -// Partition column values determine which partition the record goes to -let mut row = GenericRow::new(4); -row.set_field(0, 1); // event_id -row.set_field(1, "user_login"); // event_type -row.set_field(2, "2024-01-15"); // dt (partition column) -row.set_field(3, "US"); // region (partition column) - -append_writer.append(&row)?; -append_writer.flush().await?; -``` - -### Reading from Partitioned Log Tables - -For partitioned tables, use `subscribe_partition()` instead of `subscribe()`: - -```rust -use std::time::Duration; - -let table = conn.get_table(&table_path).await?; -let admin = conn.get_admin().await?; - -// Get partition information -let partitions = admin.list_partition_infos(&table_path).await?; - -let log_scanner = table.new_scan().create_log_scanner()?; - -// Subscribe to each partition's buckets -for partition_info in &partitions { - let partition_id = partition_info.get_partition_id(); - let num_buckets = table.get_table_info().get_num_buckets(); - - for bucket_id in 0..num_buckets { - log_scanner.subscribe_partition(partition_id, bucket_id, 0).await?; - } -} - -// Poll for records -let records = log_scanner.poll(Duration::from_secs(10)).await?; -for record in records { - println!("Record from partition: {:?}", record.row()); -} -``` - -You can also subscribe to multiple partition-buckets at once: - -```rust -use std::collections::HashMap; - -let mut partition_bucket_offsets = HashMap::new(); -partition_bucket_offsets.insert((partition_id, 0), 0i64); // partition, bucket 0, offset 0 -partition_bucket_offsets.insert((partition_id, 1), 0i64); // partition, bucket 1, offset 0 -log_scanner.subscribe_partition_buckets(&partition_bucket_offsets).await?; -``` - -### Managing Partitions - -```rust -use fluss::metadata::PartitionSpec; -use std::collections::HashMap; - -// Create a partition -let mut partition_values = HashMap::new(); -partition_values.insert("dt", "2024-01-15"); -partition_values.insert("region", "EMEA"); -let spec = PartitionSpec::new(partition_values); -admin.create_partition(&table_path, &spec, true).await?; - -// List all partitions -let partitions = admin.list_partition_infos(&table_path).await?; -for partition in &partitions { - println!( - "Partition: id={}, name={}", - partition.get_partition_id(), - partition.get_partition_name() // Format: "value1$value2" - ); -} - -// List partitions with filter (partial spec) -let mut partial_values = HashMap::new(); -partial_values.insert("dt", "2024-01-15"); -let partial_spec = PartitionSpec::new(partial_values); -let filtered = admin.list_partition_infos_with_spec(&table_path, Some(&partial_spec)).await?; - -// Drop a partition -admin.drop_partition(&table_path, &spec, true).await?; -``` - -## Primary Key Table Operations - -Primary key tables (KV tables) support upsert, delete, and lookup operations. - -### Creating a Primary Key Table - -```rust -let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("id", DataTypes::int()) - .column("name", DataTypes::string()) - .column("age", DataTypes::bigint()) - .primary_key(vec!["id"]) // Define primary key - .build()?, - ) - .build()?; - -let table_path = TablePath::new("fluss", "users"); -admin.create_table(&table_path, &table_descriptor, true).await?; -``` - -### Upserting Records - -```rust -let table = conn.get_table(&table_path).await?; -let table_upsert = table.new_upsert()?; -let upsert_writer = table_upsert.create_writer()?; - -// Insert or update records -for (id, name, age) in [(1, "Alice", 25i64), (2, "Bob", 30), (3, "Charlie", 35)] { - let mut row = GenericRow::new(3); - row.set_field(0, id); - row.set_field(1, name); - row.set_field(2, age); - upsert_writer.upsert(&row)?; -} -upsert_writer.flush().await?; -``` - -### Updating Records - -```rust -// Update existing record (same primary key) -let mut row = GenericRow::new(3); -row.set_field(0, 1); // id (primary key) -row.set_field(1, "Alice"); // name -row.set_field(2, 26i64); // Updated age - -upsert_writer.upsert(&row)?; -upsert_writer.flush().await?; -``` - -### Deleting Records - -```rust -// Delete by primary key (only primary key field needs to be set) -let mut row = GenericRow::new(3); -row.set_field(0, 2); // id of record to delete - -upsert_writer.delete(&row)?; -upsert_writer.flush().await?; -``` - -### Partial Updates - -Update only specific columns while preserving others: - -```rust -// By column indices -let partial_upsert = table_upsert.partial_update(Some(vec![0, 2]))?; -let partial_writer = partial_upsert.create_writer()?; - -let mut row = GenericRow::new(3); -row.set_field(0, 1); // id (primary key, required) -row.set_field(2, 27i64); // age (will be updated) -// name will remain unchanged - -partial_writer.upsert(&row)?; -partial_writer.flush().await?; - -// By column names -let partial_upsert = table_upsert.partial_update_with_column_names(&["id", "age"])?; -let partial_writer = partial_upsert.create_writer()?; -``` - -### Looking Up Records - -```rust -let mut lookuper = table.new_lookup()?.create_lookuper()?; - -// Create a key row (only primary key fields) -let mut key = GenericRow::new(1); -key.set_field(0, 1); // id to lookup - -let result = lookuper.lookup(&key).await?; - -if let Some(row) = result.get_single_row()? { - println!( - "Found: id={}, name={}, age={}", - row.get_int(0), - row.get_string(1), - row.get_long(2) - ); -} else { - println!("Record not found"); -} -``` - -## Partitioned Primary Key Tables - -Partitioned KV tables combine partitioning with primary key operations. Partition columns must be part of the primary key. - -### Creating a Partitioned Primary Key Table - -```rust -use fluss::metadata::{DataTypes, KvFormat, Schema, TableDescriptor, TablePath}; - -let table_descriptor = TableDescriptor::builder() - .schema( - Schema::builder() - .column("user_id", DataTypes::int()) - .column("region", DataTypes::string()) // partition column - .column("zone", DataTypes::bigint()) // partition column - .column("score", DataTypes::bigint()) - // Primary key must include partition columns - .primary_key(vec!["user_id", "region", "zone"]) - .build()?, - ) - .partitioned_by(vec!["region", "zone"]) // Define partition columns - .kv_format(KvFormat::COMPACTED) - .build()?; - -let table_path = TablePath::new("fluss", "partitioned_users"); -admin.create_table(&table_path, &table_descriptor, true).await?; -``` - -### Writing to Partitioned Primary Key Tables - -Upsert and delete operations work the same as non-partitioned KV tables. **Partitions must be created before upserting data.** - -```rust -use fluss::metadata::PartitionSpec; -use std::collections::HashMap; - -let table = conn.get_table(&table_path).await?; - -// Ensure partitions exist before upserting -for (region, zone) in [("APAC", "1"), ("EMEA", "2"), ("US", "3")] { - let mut partition_values = HashMap::new(); - partition_values.insert("region", region); - partition_values.insert("zone", zone); - let spec = PartitionSpec::new(partition_values); - admin.create_partition(&table_path, &spec, true).await?; -} - -let table_upsert = table.new_upsert()?; -let upsert_writer = table_upsert.create_writer()?; - -// Upsert records - partition is determined by partition column values -for (user_id, region, zone, score) in [ - (1001, "APAC", 1i64, 1234i64), - (1002, "EMEA", 2, 2234), - (1003, "US", 3, 3234), -] { - let mut row = GenericRow::new(4); - row.set_field(0, user_id); - row.set_field(1, region); - row.set_field(2, zone); - row.set_field(3, score); - upsert_writer.upsert(&row)?; -} -upsert_writer.flush().await?; - -// Update a record -let mut row = GenericRow::new(4); -row.set_field(0, 1001); -row.set_field(1, "APAC"); -row.set_field(2, 1i64); -row.set_field(3, 5000i64); // Updated score -upsert_writer.upsert(&row)?; -upsert_writer.flush().await?; - -// Delete a record (primary key includes partition columns) -let mut row = GenericRow::new(4); -row.set_field(0, 1002); -row.set_field(1, "EMEA"); -row.set_field(2, 2i64); -upsert_writer.delete(&row)?; -upsert_writer.flush().await?; -``` - -### Looking Up Records in Partitioned Tables - -Lookup requires all primary key columns including partition columns: - -```rust -let mut lookuper = table.new_lookup()?.create_lookuper()?; - -// Key must include all primary key columns (including partition columns) -let mut key = GenericRow::new(3); -key.set_field(0, 1001); // user_id -key.set_field(1, "APAC"); // region (partition column) -key.set_field(2, 1i64); // zone (partition column) - -let result = lookuper.lookup(&key).await?; -if let Some(row) = result.get_single_row()? { - println!("Found: score={}", row.get_long(3)); -} -``` - -> **Note:** Scanning partitioned primary key tables is not supported. Use lookup operations instead. - -## Data Types - -| Fluss Type | Rust Type | Method | -|-----------------|----------------|---------------------------------------------------------------------| -| `BOOLEAN` | `bool` | `get_boolean()`, `set_field(idx, bool)` | -| `TINYINT` | `i8` | `get_byte()`, `set_field(idx, i8)` | -| `SMALLINT` | `i16` | `get_short()`, `set_field(idx, i16)` | -| `INT` | `i32` | `get_int()`, `set_field(idx, i32)` | -| `BIGINT` | `i64` | `get_long()`, `set_field(idx, i64)` | -| `FLOAT` | `f32` | `get_float()`, `set_field(idx, f32)` | -| `DOUBLE` | `f64` | `get_double()`, `set_field(idx, f64)` | -| `CHAR` | `&str` | `get_char(idx, length)`, `set_field(idx, &str)` | -| `STRING` | `&str` | `get_string()`, `set_field(idx, &str)` | -| `DECIMAL` | `Decimal` | `get_decimal(idx, precision, scale)`, `set_field(idx, Decimal)` | -| `DATE` | `Date` | `get_date()`, `set_field(idx, Date)` | -| `TIME` | `Time` | `get_time()`, `set_field(idx, Time)` | -| `TIMESTAMP` | `TimestampNtz` | `get_timestamp_ntz(idx, precision)`, `set_field(idx, TimestampNtz)` | -| `TIMESTAMP_LTZ` | `TimestampLtz` | `get_timestamp_ltz(idx, precision)`, `set_field(idx, TimestampLtz)` | -| `BYTES` | `&[u8]` | `get_bytes()`, `set_field(idx, &[u8])` | -| `BINARY(n)` | `&[u8]` | `get_binary(idx, length)`, `set_field(idx, &[u8])` | - diff --git a/docs/verifying-a-release-candidate.md b/docs/verifying-a-release-candidate.md index e67d4efc..dc70f723 100644 --- a/docs/verifying-a-release-candidate.md +++ b/docs/verifying-a-release-candidate.md @@ -1,21 +1,3 @@ - - # How to Verify a Release Candidate This document describes how to verify a release candidate (RC) of the **Fluss clients** (fluss-rust, fluss-python, fluss-cpp) from the [fluss-rust](https://github.com/apache/fluss-rust) repository. It is intended for anyone participating in the release vote (binding or non-binding) and is based on [Verifying a Fluss Release](https://fluss.apache.org/community/how-to-release/verifying-a-fluss-release/) of the Apache Fluss project, adapted for the fluss-rust source distribution and tooling (Rust, Python, C++). diff --git a/justfile b/justfile index c4e1a763..c2a61168 100644 --- a/justfile +++ b/justfile @@ -26,3 +26,7 @@ release [version]: # Usage: just bump-version e.g. just bump-version 0.1.0 0.1.1 bump-version from to: ./scripts/bump-version.sh {{from}} {{to}} + +# Regenerate bindings/python/GENERATED_README.md from website docs. +generate-python-readme: + python bindings/python/generate_readme.py diff --git a/website/babel.config.js b/website/babel.config.js new file mode 100644 index 00000000..e00595da --- /dev/null +++ b/website/babel.config.js @@ -0,0 +1,3 @@ +module.exports = { + presets: [require.resolve('@docusaurus/core/lib/babel/preset')], +}; diff --git a/website/docs/developer-guide/_category_.json b/website/docs/developer-guide/_category_.json new file mode 100644 index 00000000..cc7b01ab --- /dev/null +++ b/website/docs/developer-guide/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Developer Guide", + "position": 3 +} diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md new file mode 100644 index 00000000..b0111c35 --- /dev/null +++ b/website/docs/developer-guide/contributing.md @@ -0,0 +1,126 @@ +# Contributing + +Welcome to the development guide for `fluss-rust`! This project builds the Fluss Rust client and language-specific bindings (Python, C++). + +## Prerequisites + +- Rust 1.85+ (see [rust-toolchain.toml](https://github.com/apache/fluss-rust/blob/main/rust-toolchain.toml)) +- Protobuf compiler (`protoc`) + +Install using your preferred package/version manager: + +```bash +# Using mise +mise install protobuf +mise install rust + +# Using Homebrew (macOS) +brew install protobuf + +# Using apt (Ubuntu/Debian) +sudo apt-get install protobuf-compiler +``` + +## IDE Setup + +We recommend [RustRover](https://www.jetbrains.com/rust/) IDE. + +### Importing the Project + +1. Clone the repository: + ```bash + git clone https://github.com/apache/fluss-rust.git + ``` +2. Open RustRover, go to the `Projects` tab, click `Open`, and navigate to the root directory. +3. Click `Open`. + +### Copyright Profile + +Fluss is an Apache project — every file needs an Apache licence header. To automate this in RustRover: + +1. Go to `Settings` > `Editor` > `Copyright` > `Copyright Profiles`. +2. Add a new profile named `Apache` with this text: + ``` + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + ``` +3. Go to `Editor` > `Copyright` and set `Apache` as the default profile. +4. Go to `Editor` > `Copyright` > `Formatting` > `Rust`, choose `Use custom formatting`, then `Use line comment`. +5. Click `Apply`. + +## Project Structure + +``` +crates/fluss — Fluss Rust client crate +crates/examples — Rust client examples +bindings/cpp — C++ bindings +bindings/python — Python bindings (PyO3) +``` + +## Building and Testing + +### Rust Client + +```bash +# Build everything +cargo build --workspace --all-targets + +# Run unit tests +cargo test --workspace + +# Run integration tests (requires a running Fluss cluster) +RUST_TEST_THREADS=1 cargo test --features integration_tests --workspace + +# Run a single test +cargo test test_name +``` + +### Python Bindings + +```bash +cd bindings/python +pip install maturin +pip install -e ".[dev]" +maturin develop +``` + +### C++ Bindings + +```bash +cd bindings/cpp +mkdir -p build && cd build +cmake .. +cmake --build . +``` + +## License Check (cargo-deny) + +We use [cargo-deny](https://embarkstudios.github.io/cargo-deny/) to ensure all dependency licenses are Apache-compatible: + +```bash +cargo install cargo-deny --locked +cargo deny check licenses +``` + +## Formatting and Clippy + +CI runs formatting and clippy checks. Run these before submitting a PR: + +```bash +cargo fmt --all +cargo clippy --all-targets --fix --allow-dirty --allow-staged +``` diff --git a/website/docs/developer-guide/release.md b/website/docs/developer-guide/release.md new file mode 100644 index 00000000..70448698 --- /dev/null +++ b/website/docs/developer-guide/release.md @@ -0,0 +1,181 @@ +# Release + +This document describes how to create a release of the Fluss clients (fluss-rust, fluss-python, fluss-cpp) from the [fluss-rust](https://github.com/apache/fluss-rust) repository. It follows the [Apache Fluss release guide](https://fluss.apache.org/community/how-to-release/creating-a-fluss-release/) and the [Apache OpenDAL release guide](https://nightlies.apache.org/opendal/opendal-docs-stable/community/release/). + +Publishing software has legal consequences. This guide complements the [Product Release Policy](https://www.apache.org/legal/release-policy.html) and [Release Distribution Policy](https://infra.apache.org/release-distribution.html). + +## Overview + +1. [Decide to release](#decide-to-release) +2. [Prepare for the release](#prepare-for-the-release) +3. [Build a release candidate](#build-a-release-candidate) +4. [Vote on the release candidate](#vote-on-the-release-candidate) +5. [Fix any issues](#fix-any-issues) (if needed, go back to step 3) +6. [Finalize the release](#finalize-the-release) +7. [Promote the release](#promote-the-release) + +## Decide to Release + +Deciding to release and selecting a Release Manager is a consensus-based decision of the community. Anybody can propose a release on the dev mailing list. + +## Prepare for the Release + +### One-Time Setup + +See [Release Manager Preparation](https://fluss.apache.org/community/how-to-release/release-manager-preparation/) for GPG key setup. For fluss-rust you do **not** need Nexus/Maven. + +### Install Rust + +The release script uses `git archive` and `gpg`. Building or verifying the project requires Rust (match [rust-toolchain.toml](https://github.com/apache/fluss-rust/blob/main/rust-toolchain.toml)). The dependency list script requires Python 3.11+. + +```bash +rustc --version +cargo --version +``` + +To use `just release`, install [just](https://github.com/casey/just). Otherwise run `./scripts/release.sh $RELEASE_VERSION`. + +### Set Environment Variables + +```bash +export RELEASE_VERSION="0.1.0" +export RELEASE_TAG="v${RELEASE_VERSION}" +export SVN_RELEASE_DIR="fluss-rust-${RELEASE_VERSION}" +export LAST_VERSION="0.0.9" # omit for the first release +export NEXT_VERSION="0.2.0" +``` + +### Generate Dependencies List + +Required by [ASF release policy](https://www.apache.org/legal/release-policy.html). Do this on `main` before creating the release branch. + +```bash +git checkout main && git pull +python3 scripts/dependencies.py generate +git add **/DEPENDENCIES*.tsv +git commit -m "chore: update dependency list for release ${RELEASE_VERSION}" +git push origin main +``` + +### Create a Release Branch + +```bash +git checkout main && git pull +git checkout -b release-${RELEASE_VERSION} +git push origin release-${RELEASE_VERSION} +``` + +### Bump Version on Main + +```bash +git checkout main && git pull +./scripts/bump-version.sh $RELEASE_VERSION $NEXT_VERSION +git add Cargo.toml +git commit -m "Bump version to ${NEXT_VERSION}" +git push origin main +``` + +## Build a Release Candidate + +### Set RC Variables + +```bash +export RC_NUM="1" +export RC_TAG="v${RELEASE_VERSION}-rc${RC_NUM}" +export SVN_RC_DIR="fluss-rust-${RELEASE_VERSION}-rc${RC_NUM}" +``` + +### Tag and Push + +```bash +git checkout release-${RELEASE_VERSION} && git pull +git tag -s $RC_TAG -m "${RC_TAG}" +git push origin $RC_TAG +``` + +Pushing the tag triggers CI (GitHub Actions: Release Rust, Release Python). + +### Create Source Artifacts + +```bash +just release $RELEASE_VERSION +# Or: ./scripts/release.sh $RELEASE_VERSION +``` + +This creates under `dist/`: +- `fluss-rust-${RELEASE_VERSION}-incubating.tgz` +- `fluss-rust-${RELEASE_VERSION}-incubating.tgz.sha512` +- `fluss-rust-${RELEASE_VERSION}-incubating.tgz.asc` + +Verify: `gpg --verify dist/fluss-rust-${RELEASE_VERSION}-incubating.tgz.asc dist/fluss-rust-${RELEASE_VERSION}-incubating.tgz` + +### Stage to SVN + +```bash +svn checkout https://dist.apache.org/repos/dist/dev/incubator/fluss fluss-dist-dev --depth=immediates +cd fluss-dist-dev +mkdir $SVN_RC_DIR +cp ../dist/fluss-rust-${RELEASE_VERSION}-incubating.* $SVN_RC_DIR/ +svn add $SVN_RC_DIR +svn commit -m "Add fluss-rust ${RELEASE_VERSION} RC${RC_NUM}" +``` + +## Vote on the Release Candidate + +Start a vote on the dev@ mailing list with subject: `[VOTE] Release Apache Fluss clients ${RELEASE_VERSION} (RC${RC_NUM})` + +The vote is open for at least 72 hours. It requires at least 3 PPMC affirmative votes. If the project is in incubation, a second vote on general@incubator.apache.org is required. + +## Fix Any Issues + +If the vote fails: + +1. Fix issues on `main` or the release branch via PRs. +2. Optionally remove the old RC from dist.apache.org dev. +3. Increment `RC_NUM`, recreate tag and artifacts, and repeat. + +## Finalize the Release + +### Push the Release Tag + +```bash +git checkout $RC_TAG +git tag -s $RELEASE_TAG -m "Release fluss-rust, fluss-python, fluss-cpp ${RELEASE_VERSION}" +git push origin $RELEASE_TAG +``` + +### Deploy Source Artifacts + +```bash +svn mv -m "Release fluss-rust ${RELEASE_VERSION}" \ + https://dist.apache.org/repos/dist/dev/incubator/fluss/$SVN_RC_DIR \ + https://dist.apache.org/repos/dist/release/incubator/fluss/$SVN_RELEASE_DIR +``` + +### Verify Published Packages + +- **Rust:** [crates.io/crates/fluss-rs](https://crates.io/crates/fluss-rs) +- **Python:** [PyPI — pyfluss](https://pypi.org/project/pyfluss/) +- **C++:** Distributed via the source archive + +### Create GitHub Release + +1. Go to [Releases > New release](https://github.com/apache/fluss-rust/releases/new). +2. Choose tag `$RELEASE_TAG`, target `release-${RELEASE_VERSION}`. +3. Generate release notes, add notable/breaking changes and download links. +4. Publish. + +### Update CHANGELOG.md + +Add an entry for `$RELEASE_VERSION` on `main`. + +## Promote the Release + +- Merge website PRs (release blog, download page). +- Wait 24 hours, then announce on dev@ and announce@apache.org. + +## See Also + +- [Release Manager Preparation](https://fluss.apache.org/community/how-to-release/release-manager-preparation/) +- [How to Verify a Release Candidate](https://github.com/apache/fluss-rust/blob/main/docs/verifying-a-release-candidate.md) +- [ASF Release Policy](https://www.apache.org/legal/release-policy.html) diff --git a/website/docs/index.md b/website/docs/index.md new file mode 100644 index 00000000..3f8dd5aa --- /dev/null +++ b/website/docs/index.md @@ -0,0 +1,33 @@ +--- +slug: / +sidebar_position: 1 +title: Introduction +--- + +# Introduction + +[Apache Fluss](https://fluss.apache.org/) (incubating) is a streaming storage system built for real-time analytics, serving as the real-time data layer for Lakehouse architectures. + +This documentation covers the **Fluss client libraries** for Rust, Python, and C++, which are developed in the [fluss-rust](https://github.com/apache/fluss-rust) repository. These clients allow you to: + +- **Create and manage** databases, tables, and partitions +- **Write** data to log tables (append-only) and primary key tables (upsert/delete) +- **Read** data via log scanning and key lookups +- **Integrate** with the broader Fluss ecosystem including lakehouse snapshots + +## Client Overview + +| | Rust | Python | C++ | +|---|---|---|---| +| **Package** | [fluss-rs](https://crates.io/crates/fluss-rs) on crates.io | Build from source (PyO3) | Build from source (CMake) | +| **Async runtime** | Tokio | asyncio | Synchronous (Tokio runtime managed internally) | +| **Data format** | Arrow RecordBatch / GenericRow | PyArrow / Pandas / dict | Arrow RecordBatch / GenericRow | +| **Log tables** | Read + Write | Read + Write | Read + Write | +| **Primary key tables** | Upsert + Delete + Lookup | Upsert + Delete + Lookup | Upsert + Delete + Lookup | +| **Partitioned tables** | Full support | Write support | Full support | + +## How This Guide Is Organised + +The **User Guide** walks through installation, configuration, and working with each table type across all three languages. Code examples are shown side by side under **Rust**, **Python**, and **C++** headings. + +The **Developer Guide** covers building from source, running tests, and the release process for contributors. diff --git a/website/docs/user-guide/_category_.json b/website/docs/user-guide/_category_.json new file mode 100644 index 00000000..68ea78e7 --- /dev/null +++ b/website/docs/user-guide/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "User Guide", + "position": 2 +} diff --git a/website/docs/user-guide/cpp/_category_.json b/website/docs/user-guide/cpp/_category_.json new file mode 100644 index 00000000..fbdf7a26 --- /dev/null +++ b/website/docs/user-guide/cpp/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "C++", + "position": 3 +} diff --git a/website/docs/user-guide/cpp/api-reference.md b/website/docs/user-guide/cpp/api-reference.md new file mode 100644 index 00000000..4bbabfb5 --- /dev/null +++ b/website/docs/user-guide/cpp/api-reference.md @@ -0,0 +1,486 @@ +--- +sidebar_position: 2 +--- +# API Reference + +Complete API reference for the Fluss C++ client. + +## `Result` + +| Field / Method | Type | Description | +|---|---|---| +| `error_code` | `int32_t` | 0 for success, non-zero for errors | +| `error_message` | `std::string` | Human-readable error description | +| `Ok()` | `bool` | Returns `true` if operation succeeded (`error_code == 0`) | + +## `Configuration` + +| Field | Type | Default | Description | +|---|---|---|---| +| `bootstrap_server` | `std::string` | `"127.0.0.1:9123"` | Coordinator server address | +| `request_max_size` | `int32_t` | `10485760` (10 MB) | Maximum request size in bytes | +| `writer_acks` | `std::string` | `"all"` | Acknowledgment setting (`"all"`, `"0"`, `"1"`, or `"-1"`) | +| `writer_retries` | `int32_t` | `INT32_MAX` | Number of retries on failure | +| `writer_batch_size` | `int32_t` | `2097152` (2 MB) | Batch size for writes in bytes | +| `scanner_remote_log_prefetch_num` | `size_t` | `4` | Number of remote log segments to prefetch | +| `scanner_remote_log_download_threads` | `size_t` | `3` | Number of threads for remote log downloads | + +## `Connection` + +| Method | Description | +|---|---| +| `static Create(const Configuration& config, Connection& out) -> Result` | Create a connection to a Fluss cluster | +| `GetAdmin(Admin& out) -> Result` | Get the admin interface | +| `GetTable(const TablePath& table_path, Table& out) -> Result` | Get a table for read/write operations | +| `Available() -> bool` | Check if the connection is valid and initialized | + +## `Admin` + +### Database Operations + +| Method | Description | +|---|---| +| `CreateDatabase(const std::string& database_name, const DatabaseDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a database | +| `DropDatabase(const std::string& name, bool ignore_if_not_exists, bool cascade) -> Result` | Drop a database | +| `ListDatabases(std::vector& out) -> Result` | List all databases | +| `DatabaseExists(const std::string& name, bool& out) -> Result` | Check if a database exists | +| `GetDatabaseInfo(const std::string& name, DatabaseInfo& out) -> Result` | Get database metadata | + +### Table Operations + +| Method | Description | +|---|---| +| `CreateTable(const TablePath& path, const TableDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a table | +| `DropTable(const TablePath& path, bool ignore_if_not_exists) -> Result` | Drop a table | +| `GetTable(const TablePath& path, TableInfo& out) -> Result` | Get table metadata | +| `ListTables(const std::string& database_name, std::vector& out) -> Result` | List tables in a database | +| `TableExists(const TablePath& path, bool& out) -> Result` | Check if a table exists | + +### Partition Operations + +| Method | Description | +|---|---| +| `CreatePartition(const TablePath& path, const std::unordered_map& partition_spec, bool ignore_if_exists) -> Result` | Create a partition | +| `DropPartition(const TablePath& path, const std::unordered_map& partition_spec, bool ignore_if_not_exists) -> Result` | Drop a partition | +| `ListPartitionInfos(const TablePath& path, std::vector& out) -> Result` | List partition metadata | + +### Offset Operations + +| Method | Description | +|---|---| +| `ListOffsets(const TablePath& path, const std::vector& bucket_ids, const OffsetQuery& query, std::unordered_map& out) -> Result` | Get offsets for buckets | +| `ListPartitionOffsets(const TablePath& path, const std::string& partition_name, const std::vector& bucket_ids, const OffsetQuery& query, std::unordered_map& out) -> Result` | Get offsets for a partition's buckets | + +### Lake Operations + +| Method | Description | +|---|---| +| `GetLatestLakeSnapshot(const TablePath& path, LakeSnapshot& out) -> Result` | Get the latest lake snapshot | + +## `Table` + +| Method | Description | +|---|---| +| `NewRow() -> GenericRow` | Create a schema-aware row for this table | +| `NewAppend() -> TableAppend` | Create an append builder for log tables | +| `NewUpsert() -> TableUpsert` | Create an upsert builder for PK tables | +| `NewLookup() -> TableLookup` | Create a lookup builder for PK tables | +| `NewScan() -> TableScan` | Create a scan builder | +| `GetTableInfo() -> TableInfo` | Get table metadata | +| `GetTablePath() -> TablePath` | Get the table path | +| `HasPrimaryKey() -> bool` | Check if the table has a primary key | + +## `TableAppend` + +| Method | Description | +|---|---| +| `CreateWriter(AppendWriter& out) -> Result` | Create an append writer | + +## `TableUpsert` + +| Method | Description | +|---|---| +| `PartialUpdateByIndex(std::vector column_indices) -> TableUpsert&` | Configure partial update by column indices | +| `PartialUpdateByName(std::vector column_names) -> TableUpsert&` | Configure partial update by column names | +| `CreateWriter(UpsertWriter& out) -> Result` | Create an upsert writer | + +## `TableLookup` + +| Method | Description | +|---|---| +| `CreateLookuper(Lookuper& out) -> Result` | Create a lookuper for point lookups | + +## `TableScan` + +| Method | Description | +|---|---| +| `ProjectByIndex(std::vector column_indices) -> TableScan&` | Project columns by index | +| `ProjectByName(std::vector column_names) -> TableScan&` | Project columns by name | +| `CreateLogScanner(LogScanner& out) -> Result` | Create a record-based log scanner | +| `CreateRecordBatchScanner(LogScanner& out) -> Result` | Create an Arrow RecordBatch-based log scanner | + +## `AppendWriter` + +| Method | Description | +|---|---| +| `Append(const GenericRow& row) -> Result` | Append a row (fire-and-forget) | +| `Append(const GenericRow& row, WriteResult& out) -> Result` | Append a row with write acknowledgment | +| `Flush() -> Result` | Flush all pending writes | + +## `UpsertWriter` + +| Method | Description | +|---|---| +| `Upsert(const GenericRow& row) -> Result` | Upsert a row (fire-and-forget) | +| `Upsert(const GenericRow& row, WriteResult& out) -> Result` | Upsert a row with write acknowledgment | +| `Delete(const GenericRow& row) -> Result` | Delete a row by primary key (fire-and-forget) | +| `Delete(const GenericRow& row, WriteResult& out) -> Result` | Delete a row with write acknowledgment | +| `Flush() -> Result` | Flush all pending operations | + +## `WriteResult` + +| Method | Description | +|---|---| +| `Wait() -> Result` | Wait for server acknowledgment of the write | + +## `Lookuper` + +| Method | Description | +|---|---| +| `Lookup(const GenericRow& pk_row, bool& found, GenericRow& out) -> Result` | Lookup a row by primary key | + +## `LogScanner` + +| Method | Description | +|---|---| +| `Subscribe(int32_t bucket_id, int64_t offset) -> Result` | Subscribe to a single bucket at an offset | +| `Subscribe(const std::vector& bucket_offsets) -> Result` | Subscribe to multiple buckets | +| `SubscribePartitionBuckets(int64_t partition_id, int32_t bucket_id, int64_t start_offset) -> Result` | Subscribe to a single partition bucket | +| `SubscribePartitionBuckets(const std::vector& subscriptions) -> Result` | Subscribe to multiple partition buckets | +| `UnsubscribePartition(int64_t partition_id, int32_t bucket_id) -> Result` | Unsubscribe from a partition bucket | +| `Poll(int64_t timeout_ms, ScanRecords& out) -> Result` | Poll individual records | +| `PollRecordBatch(int64_t timeout_ms, ArrowRecordBatches& out) -> Result` | Poll Arrow RecordBatches | + +## `GenericRow` + +### Index-Based Getters + +| Method | Description | +|---|---| +| `GetBool(size_t idx) -> bool` | Get boolean value at index | +| `GetInt32(size_t idx) -> int32_t` | Get 32-bit integer at index | +| `GetInt64(size_t idx) -> int64_t` | Get 64-bit integer at index | +| `GetFloat32(size_t idx) -> float` | Get 32-bit float at index | +| `GetFloat64(size_t idx) -> double` | Get 64-bit float at index | +| `GetString(size_t idx) -> std::string` | Get string at index | +| `GetBytes(size_t idx) -> std::vector` | Get binary data at index | +| `GetDate(size_t idx) -> Date` | Get date at index | +| `GetTime(size_t idx) -> Time` | Get time at index | +| `GetTimestamp(size_t idx) -> Timestamp` | Get timestamp at index | +| `DecimalToString(size_t idx) -> std::string` | Get decimal as string at index | + +### Index-Based Setters + +| Method | Description | +|---|---| +| `SetNull(size_t idx)` | Set field to null | +| `SetBool(size_t idx, bool value)` | Set boolean value | +| `SetInt32(size_t idx, int32_t value)` | Set 32-bit integer | +| `SetInt64(size_t idx, int64_t value)` | Set 64-bit integer | +| `SetFloat32(size_t idx, float value)` | Set 32-bit float | +| `SetFloat64(size_t idx, double value)` | Set 64-bit float | +| `SetString(size_t idx, const std::string& value)` | Set string value | +| `SetBytes(size_t idx, const std::vector& value)` | Set binary data | +| `SetDate(size_t idx, const Date& value)` | Set date value | +| `SetTime(size_t idx, const Time& value)` | Set time value | +| `SetTimestampNtz(size_t idx, const Timestamp& value)` | Set timestamp without timezone | +| `SetTimestampLtz(size_t idx, const Timestamp& value)` | Set timestamp with timezone | +| `SetDecimal(size_t idx, const std::string& value)` | Set decimal from string | + +### Name-Based Setters + +When using `table.NewRow()`, the `Set()` method auto-routes to the correct type based on the schema: + +| Method | Description | +|---|---| +| `Set(const std::string& name, bool value)` | Set boolean by column name | +| `Set(const std::string& name, int32_t value)` | Set integer by column name | +| `Set(const std::string& name, int64_t value)` | Set big integer by column name | +| `Set(const std::string& name, float value)` | Set float by column name | +| `Set(const std::string& name, double value)` | Set double by column name | +| `Set(const std::string& name, const std::string& value)` | Set string/decimal by column name | +| `Set(const std::string& name, const Date& value)` | Set date by column name | +| `Set(const std::string& name, const Time& value)` | Set time by column name | +| `Set(const std::string& name, const Timestamp& value)` | Set timestamp by column name | + +### Row Inspection + +| Method | Description | +|---|---| +| `FieldCount() -> size_t` | Get the number of fields | +| `GetType(size_t idx) -> DatumType` | Get the datum type at index | +| `IsNull(size_t idx) -> bool` | Check if field is null | +| `IsDecimal(size_t idx) -> bool` | Check if field is a decimal type | + +## `ScanRecord` + +| Field | Type | Description | +|---|---|---| +| `bucket_id` | `int32_t` | Bucket this record belongs to | +| `offset` | `int64_t` | Record offset in the log | +| `timestamp` | `int64_t` | Record timestamp | +| `row` | `GenericRow` | Row data | + +## `ScanRecords` + +| Method | Description | +|---|---| +| `Size() -> size_t` | Number of records | +| `Empty() -> bool` | Check if empty | +| `operator[](size_t idx) -> const ScanRecord&` | Access record by index | +| `begin() / end()` | Iterator support for range-based for loops | + +## `ArrowRecordBatch` + +| Method | Description | +|---|---| +| `GetArrowRecordBatch() -> std::shared_ptr` | Get the underlying Arrow RecordBatch | +| `NumRows() -> int64_t` | Number of rows in the batch | +| `GetTableId() -> int64_t` | Table ID | +| `GetPartitionId() -> int64_t` | Partition ID | +| `GetBucketId() -> int32_t` | Bucket ID | +| `GetBaseOffset() -> int64_t` | First record offset | +| `GetLastOffset() -> int64_t` | Last record offset | + +## `ArrowRecordBatches` + +| Method | Description | +|---|---| +| `Size() -> size_t` | Number of batches | +| `Empty() -> bool` | Check if empty | +| `operator[](size_t idx)` | Access batch by index | +| `begin() / end()` | Iterator support for range-based for loops | + +## `Schema` + +| Method | Description | +|---|---| +| `NewBuilder() -> Schema::Builder` | Create a new schema builder | + +## `Schema::Builder` + +| Method | Description | +|---|---| +| `AddColumn(const std::string& name, const DataType& type) -> Builder&` | Add a column | +| `SetPrimaryKeys(const std::vector& keys) -> Builder&` | Set primary key columns | +| `Build() -> Schema` | Build the schema | + +## `TableDescriptor` + +| Method | Description | +|---|---| +| `NewBuilder() -> TableDescriptor::Builder` | Create a new table descriptor builder | + +## `TableDescriptor::Builder` + +| Method | Description | +|---|---| +| `SetSchema(const Schema& schema) -> Builder&` | Set the table schema | +| `SetPartitionKeys(const std::vector& keys) -> Builder&` | Set partition key columns | +| `SetBucketCount(int32_t count) -> Builder&` | Set the number of buckets | +| `SetBucketKeys(const std::vector& keys) -> Builder&` | Set bucket key columns | +| `SetProperty(const std::string& key, const std::string& value) -> Builder&` | Set a table property | +| `SetComment(const std::string& comment) -> Builder&` | Set a table comment | +| `Build() -> TableDescriptor` | Build the table descriptor | + +## `DataType` + +### Factory Methods + +| Method | Description | +|---|---| +| `DataType::Boolean()` | Boolean type | +| `DataType::TinyInt()` | 8-bit signed integer | +| `DataType::SmallInt()` | 16-bit signed integer | +| `DataType::Int()` | 32-bit signed integer | +| `DataType::BigInt()` | 64-bit signed integer | +| `DataType::Float()` | 32-bit floating point | +| `DataType::Double()` | 64-bit floating point | +| `DataType::String()` | UTF-8 string | +| `DataType::Bytes()` | Binary data | +| `DataType::Date()` | Date (days since epoch) | +| `DataType::Time()` | Time (milliseconds since midnight) | +| `DataType::Timestamp(int precision)` | Timestamp without timezone | +| `DataType::TimestampLtz(int precision)` | Timestamp with timezone | +| `DataType::Decimal(int precision, int scale)` | Decimal with precision and scale | + +### Accessors + +| Method | Description | +|---|---| +| `id() -> TypeId` | Get the type ID | +| `precision() -> int` | Get precision (for Decimal/Timestamp types) | +| `scale() -> int` | Get scale (for Decimal type) | + +## `TablePath` + +| Method / Field | Description | +|---|---| +| `TablePath(const std::string& database, const std::string& table)` | Create a table path | +| `database_name -> std::string` | Database name | +| `table_name -> std::string` | Table name | +| `ToString() -> std::string` | String representation | + +## `TableInfo` + +| Field | Type | Description | +|---|---|---| +| `table_id` | `int64_t` | Table ID | +| `schema_id` | `int32_t` | Schema ID | +| `table_path` | `TablePath` | Table path | +| `created_time` | `int64_t` | Creation timestamp | +| `modified_time` | `int64_t` | Last modification timestamp | +| `primary_keys` | `std::vector` | Primary key columns | +| `bucket_keys` | `std::vector` | Bucket key columns | +| `partition_keys` | `std::vector` | Partition key columns | +| `num_buckets` | `int32_t` | Number of buckets | +| `has_primary_key` | `bool` | Whether the table has a primary key | +| `is_partitioned` | `bool` | Whether the table is partitioned | +| `properties` | `std::unordered_map` | Table properties | +| `comment` | `std::string` | Table comment | +| `schema` | `Schema` | Table schema | + +## Temporal Types + +### `Date` + +| Method | Description | +|---|---| +| `Date::FromDays(int32_t days)` | Create from days since epoch | +| `Date::FromYMD(int year, int month, int day)` | Create from year, month, day | +| `Year() -> int` | Get year | +| `Month() -> int` | Get month | +| `Day() -> int` | Get day | + +### `Time` + +| Method | Description | +|---|---| +| `Time::FromMillis(int64_t millis)` | Create from milliseconds since midnight | +| `Time::FromHMS(int hour, int minute, int second)` | Create from hour, minute, second | +| `Hour() -> int` | Get hour | +| `Minute() -> int` | Get minute | +| `Second() -> int` | Get second | +| `Millis() -> int64_t` | Get milliseconds | + +### `Timestamp` + +| Method | Description | +|---|---| +| `Timestamp::FromMillis(int64_t millis)` | Create from milliseconds since epoch | +| `Timestamp::FromMillisNanos(int64_t millis, int32_t nanos)` | Create from milliseconds and nanoseconds | +| `Timestamp::FromTimePoint(std::chrono::system_clock::time_point tp)` | Create from a time point | + +## `PartitionInfo` + +| Field | Type | Description | +|---|---|---| +| `partition_id` | `int64_t` | Partition ID | +| `partition_name` | `std::string` | Partition name | + +## `DatabaseDescriptor` + +| Field | Type | Description | +|---|---|---| +| `comment` | `std::string` | Database comment | +| `properties` | `std::unordered_map` | Custom properties | + +## `DatabaseInfo` + +| Field | Type | Description | +|---|---|---| +| `database_name` | `std::string` | Database name | +| `comment` | `std::string` | Database comment | +| `properties` | `std::unordered_map` | Custom properties | +| `created_time` | `int64_t` | Creation timestamp | +| `modified_time` | `int64_t` | Last modification timestamp | + +## `LakeSnapshot` + +| Field | Type | Description | +|---|---|---| +| `snapshot_id` | `int64_t` | Snapshot ID | +| `bucket_offsets` | `std::vector` | All bucket offsets | + +## `BucketOffset` + +| Field | Type | Description | +|---|---|---| +| `table_id` | `int64_t` | Table ID | +| `partition_id` | `int64_t` | Partition ID | +| `bucket_id` | `int32_t` | Bucket ID | +| `offset` | `int64_t` | Offset value | + +## `OffsetQuery` + +| Method | Description | +|---|---| +| `OffsetQuery::Earliest()` | Query for the earliest available offset | +| `OffsetQuery::Latest()` | Query for the latest offset | +| `OffsetQuery::FromTimestamp(int64_t timestamp_ms)` | Query offset at a specific timestamp | + +## Constants + +| Constant | Value | Description | +|---|---|---| +| `fluss::EARLIEST_OFFSET` | `-2` | Start reading from the earliest available offset | +| `fluss::LATEST_OFFSET` | `-1` | Start reading from the latest offset (only new records) | + +## Enums + +### `TypeId` + +| Value | Description | +|---|---| +| `Boolean` | Boolean type | +| `TinyInt` | 8-bit signed integer | +| `SmallInt` | 16-bit signed integer | +| `Int` | 32-bit signed integer | +| `BigInt` | 64-bit signed integer | +| `Float` | 32-bit floating point | +| `Double` | 64-bit floating point | +| `String` | UTF-8 string | +| `Bytes` | Binary data | +| `Date` | Date | +| `Time` | Time | +| `Timestamp` | Timestamp without timezone | +| `TimestampLtz` | Timestamp with timezone | +| `Decimal` | Decimal | + +### `DatumType` + +| Value | C++ Type | Description | +|---|---|---| +| `Null` | -- | Null value | +| `Bool` | `bool` | Boolean | +| `Int32` | `int32_t` | 32-bit integer | +| `Int64` | `int64_t` | 64-bit integer | +| `Float32` | `float` | 32-bit float | +| `Float64` | `double` | 64-bit float | +| `String` | `std::string` | String | +| `Bytes` | `std::vector` | Binary data | +| `DecimalI64` | `int64_t` | Decimal (64-bit internal) | +| `DecimalI128` | `__int128` | Decimal (128-bit internal) | +| `DecimalString` | `std::string` | Decimal (string representation) | +| `Date` | `Date` | Date | +| `Time` | `Time` | Time | +| `TimestampNtz` | `Timestamp` | Timestamp without timezone | +| `TimestampLtz` | `Timestamp` | Timestamp with timezone | + +### `OffsetSpec` + +| Value | Description | +|---|---| +| `Earliest` | Earliest available offset | +| `Latest` | Latest offset | +| `Timestamp` | Offset at a specific timestamp | diff --git a/website/docs/user-guide/cpp/data-types.md b/website/docs/user-guide/cpp/data-types.md new file mode 100644 index 00000000..765b2f12 --- /dev/null +++ b/website/docs/user-guide/cpp/data-types.md @@ -0,0 +1,102 @@ +--- +sidebar_position: 3 +--- +# Data Types + +## Schema DataTypes + +| DataType | Description | +|---|---| +| `DataType::Boolean()` | Boolean value | +| `DataType::TinyInt()` | 8-bit signed integer | +| `DataType::SmallInt()` | 16-bit signed integer | +| `DataType::Int()` | 32-bit signed integer | +| `DataType::BigInt()` | 64-bit signed integer | +| `DataType::Float()` | 32-bit floating point | +| `DataType::Double()` | 64-bit floating point | +| `DataType::String()` | UTF-8 string | +| `DataType::Bytes()` | Binary data | +| `DataType::Date()` | Date (days since epoch) | +| `DataType::Time()` | Time (milliseconds since midnight) | +| `DataType::Timestamp()` | Timestamp without timezone | +| `DataType::TimestampLtz()` | Timestamp with timezone | +| `DataType::Decimal(p, s)` | Decimal with precision and scale | + +## GenericRow Setters + +```cpp +fluss::GenericRow row; +row.SetNull(0); +row.SetBool(1, true); +row.SetInt32(2, 42); +row.SetInt64(3, 1234567890L); +row.SetFloat32(4, 3.14f); +row.SetFloat64(5, 2.71828); +row.SetString(6, "hello"); +row.SetBytes(7, {0x01, 0x02, 0x03}); +``` + +## Name-Based Setters + +When using `table.NewRow()`, you can set fields by column name. The setter automatically routes to the correct type based on the schema: + +```cpp +auto row = table.NewRow(); +row.Set("user_id", 1); +row.Set("name", "Alice"); +row.Set("score", 95.5f); +row.Set("balance", "1234.56"); // decimal as string +row.Set("birth_date", fluss::Date::FromYMD(1990, 3, 15)); +row.Set("login_time", fluss::Time::FromHMS(9, 30, 0)); +row.Set("created_at", fluss::Timestamp::FromMillis(1700000000000)); +``` + +## GenericRow Getters + +```cpp +std::string name = result_row.GetString(1); +float score = result_row.GetFloat32(3); +std::string balance = result_row.DecimalToString(4); +fluss::Date date = result_row.GetDate(5); +fluss::Time time = result_row.GetTime(6); +fluss::Timestamp ts = result_row.GetTimestamp(7); +``` + +## DatumType Enum + +| DatumType | C++ Type | Getter | +|---|---|---| +| `Null` | — | `IsNull(idx)` | +| `Bool` | `bool` | `GetBool(idx)` | +| `Int32` | `int32_t` | `GetInt32(idx)` | +| `Int64` | `int64_t` | `GetInt64(idx)` | +| `Float32` | `float` | `GetFloat32(idx)` | +| `Float64` | `double` | `GetFloat64(idx)` | +| `String` | `std::string` | `GetString(idx)` | +| `Bytes` | `std::vector` | `GetBytes(idx)` | +| `Date` | `Date` | `GetDate(idx)` | +| `Time` | `Time` | `GetTime(idx)` | +| `TimestampNtz` | `Timestamp` | `GetTimestamp(idx)` | +| `TimestampLtz` | `Timestamp` | `GetTimestamp(idx)` | +| `DecimalString` | `std::string` | `DecimalToString(idx)` | + +## Type Checking + +```cpp +if (rec.row.GetType(0) == fluss::DatumType::Int32) { + int32_t value = rec.row.GetInt32(0); +} +if (rec.row.IsNull(1)) { + // field is null +} +if (rec.row.IsDecimal(2)) { + std::string decimal_str = rec.row.DecimalToString(2); +} +``` + +## Constants + +```cpp +constexpr int64_t fluss::EARLIEST_OFFSET = -2; // Start from earliest +constexpr int64_t fluss::LATEST_OFFSET = -1; // Start from latest +``` diff --git a/website/docs/user-guide/cpp/error-handling.md b/website/docs/user-guide/cpp/error-handling.md new file mode 100644 index 00000000..bad82291 --- /dev/null +++ b/website/docs/user-guide/cpp/error-handling.md @@ -0,0 +1,134 @@ +--- +sidebar_position: 4 +--- +# Error Handling + +All C++ client operations return a `fluss::Result` struct instead of throwing exceptions. This gives you explicit control over error handling. + +## The `Result` Struct + +```cpp +#include "fluss.hpp" + +// All operations return fluss::Result +fluss::Result result = admin.CreateTable(path, descriptor); +if (!result.Ok()) { + std::cerr << "Error code: " << result.error_code << std::endl; + std::cerr << "Error message: " << result.error_message << std::endl; +} +``` + +| Field / Method | Type | Description | +|---|---|---| +| `error_code` | `int32_t` | 0 for success, non-zero for errors | +| `error_message` | `std::string` | Human-readable error description | +| `Ok()` | `bool` | Returns `true` if the operation succeeded | + +## Common Pattern: Helper Function + +A common pattern is to define a `check` helper that exits on failure: + +```cpp +static void check(const char* step, const fluss::Result& r) { + if (!r.Ok()) { + std::cerr << step << " failed: " << r.error_message << std::endl; + std::exit(1); + } +} + +// Usage +fluss::Configuration config; +config.bootstrap_server = "127.0.0.1:9123"; +check("create", fluss::Connection::Create(config, conn)); +check("create_table", admin.CreateTable(table_path, descriptor, true)); +check("flush", writer.Flush()); +``` + +## Connection State Checking + +Use `Available()` to verify that a connection or object is valid before using it: + +```cpp +fluss::Connection conn; +if (!conn.Available()) { + // Connection not initialized or already moved +} + +fluss::Configuration config; +config.bootstrap_server = "127.0.0.1:9123"; +fluss::Result result = fluss::Connection::Create(config, conn); +if (result.Ok() && conn.Available()) { + // Connection is ready to use +} +``` + +## Common Error Scenarios + +### Connection Refused + +The cluster is not running or the address is incorrect: + +```cpp +fluss::Configuration config; +config.bootstrap_server = "127.0.0.1:9123"; +fluss::Connection conn; +fluss::Result result = fluss::Connection::Create(config, conn); +if (!result.Ok()) { + // "Connection refused" or timeout error + std::cerr << "Cannot connect to cluster: " << result.error_message << std::endl; +} +``` + +### Table Not Found + +Attempting to access a table that does not exist: + +```cpp +fluss::Table table; +fluss::Result result = conn.GetTable(fluss::TablePath("fluss", "nonexistent"), table); +if (!result.Ok()) { + // Table not found error + std::cerr << "Table error: " << result.error_message << std::endl; +} +``` + +### Partition Not Found + +Writing to a partitioned primary key table before creating partitions: + +```cpp +// This will fail if partitions are not created first +auto row = table.NewRow(); +row.Set("user_id", 1); +row.Set("region", "US"); +row.Set("score", static_cast(100)); +fluss::WriteResult wr; +fluss::Result result = writer.Upsert(row, wr); +if (!result.Ok()) { + // Partition not found — create partitions before writing + std::cerr << "Write error: " << result.error_message << std::endl; +} +``` + +### Schema Mismatch + +Using incorrect types or column indices when writing: + +```cpp +fluss::GenericRow row; +// Setting wrong type for a column will result in an error +// when the row is sent to the server +row.SetString(0, "not_an_integer"); // Column 0 expects Int +fluss::Result result = writer.Append(row); +if (!result.Ok()) { + std::cerr << "Schema mismatch: " << result.error_message << std::endl; +} +``` + +## Best Practices + +1. **Always check `Result`** -- Never ignore the return value of operations that return `Result`. +2. **Use a helper function** -- Define a `check()` helper to reduce boilerplate for fatal errors. +3. **Handle errors gracefully** -- For production code, log errors and retry or fail gracefully instead of calling `std::exit()`. +4. **Verify connection state** -- Use `Available()` to check connection validity before operations. +5. **Create partitions before writing** -- For partitioned primary key tables, always create partitions before attempting upserts. diff --git a/website/docs/user-guide/cpp/example/_category_.json b/website/docs/user-guide/cpp/example/_category_.json new file mode 100644 index 00000000..dd222949 --- /dev/null +++ b/website/docs/user-guide/cpp/example/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Example", + "position": 5 +} diff --git a/website/docs/user-guide/cpp/example/admin-operations.md b/website/docs/user-guide/cpp/example/admin-operations.md new file mode 100644 index 00000000..2016cb0a --- /dev/null +++ b/website/docs/user-guide/cpp/example/admin-operations.md @@ -0,0 +1,111 @@ +--- +sidebar_position: 3 +--- +# Admin Operations + +## Get Admin Interface + +```cpp +fluss::Admin admin; +check("get_admin", conn.GetAdmin(admin)); +``` + +## Table Operations + +```cpp +fluss::TablePath table_path("fluss", "my_table"); + +auto schema = fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int()) + .AddColumn("name", fluss::DataType::String()) + .AddColumn("score", fluss::DataType::Float()) + .AddColumn("age", fluss::DataType::Int()) + .Build(); + +auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetBucketCount(3) + .SetComment("Example table") + .Build(); + +// Create table +check("create_table", admin.CreateTable(table_path, descriptor, true)); + +// Get table information +fluss::TableInfo table_info; +check("get_table", admin.GetTable(table_path, table_info)); +std::cout << "Table ID: " << table_info.table_id << std::endl; +std::cout << "Number of buckets: " << table_info.num_buckets << std::endl; +std::cout << "Has primary key: " << table_info.has_primary_key << std::endl; +std::cout << "Is partitioned: " << table_info.is_partitioned << std::endl; + +// Drop table +check("drop_table", admin.DropTable(table_path, true)); +``` + +## Schema Builder Options + +```cpp +// Schema with primary key +auto pk_schema = fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int()) + .AddColumn("name", fluss::DataType::String()) + .AddColumn("value", fluss::DataType::Double()) + .SetPrimaryKeys({"id"}) + .Build(); + +// Table descriptor with partitioning +auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetPartitionKeys({"date"}) + .SetBucketCount(3) + .SetBucketKeys({"user_id"}) + .SetProperty("retention_days", "7") + .SetComment("Sample table") + .Build(); +``` + +## Offset Operations + +```cpp +std::vector bucket_ids = {0, 1, 2}; + +// Query earliest offsets +std::unordered_map earliest_offsets; +check("list_offsets", + admin.ListOffsets(table_path, bucket_ids, + fluss::OffsetQuery::Earliest(), earliest_offsets)); + +// Query latest offsets +std::unordered_map latest_offsets; +check("list_offsets", + admin.ListOffsets(table_path, bucket_ids, + fluss::OffsetQuery::Latest(), latest_offsets)); + +// Query offsets for a specific timestamp +std::unordered_map timestamp_offsets; +check("list_offsets", + admin.ListOffsets(table_path, bucket_ids, + fluss::OffsetQuery::FromTimestamp(timestamp_ms), + timestamp_offsets)); + +// Query partition offsets +std::unordered_map partition_offsets; +check("list_partition_offsets", + admin.ListPartitionOffsets(table_path, "partition_name", + bucket_ids, fluss::OffsetQuery::Latest(), + partition_offsets)); +``` + +## Lake Snapshot + +```cpp +fluss::LakeSnapshot snapshot; +check("get_snapshot", admin.GetLatestLakeSnapshot(table_path, snapshot)); +std::cout << "Snapshot ID: " << snapshot.snapshot_id << std::endl; +for (const auto& bucket_offset : snapshot.bucket_offsets) { + std::cout << " Table " << bucket_offset.table_id + << ", Bucket " << bucket_offset.bucket_id + << ": offset=" << bucket_offset.offset << std::endl; +} +``` diff --git a/website/docs/user-guide/cpp/example/configuration.md b/website/docs/user-guide/cpp/example/configuration.md new file mode 100644 index 00000000..f576eb3c --- /dev/null +++ b/website/docs/user-guide/cpp/example/configuration.md @@ -0,0 +1,49 @@ +--- +sidebar_position: 2 +--- +# Configuration + +## Connection Setup + +```cpp +#include "fluss.hpp" + +fluss::Configuration config; +config.bootstrap_server = "127.0.0.1:9123"; + +fluss::Connection conn; +fluss::Result result = fluss::Connection::Create(config, conn); + +if (!result.Ok()) { + std::cerr << "Connection failed: " << result.error_message << std::endl; +} +``` + +## Configuration Options + +All fields have sensible defaults. Only `bootstrap_server` typically needs to be set. + +```cpp +fluss::Configuration config; +config.bootstrap_server = "127.0.0.1:9123"; // Coordinator address +config.request_max_size = 10 * 1024 * 1024; // Max request size (10 MB) +config.writer_acks = "all"; // Wait for all replicas +config.writer_retries = std::numeric_limits::max(); // Retry on failure +config.writer_batch_size = 2 * 1024 * 1024; // Batch size (2 MB) +config.scanner_remote_log_prefetch_num = 4; // Remote log prefetch count +config.scanner_remote_log_download_threads = 3; // Download threads +``` + +## Error Handling + +All C++ operations return a `fluss::Result`. Check with `Ok()` before continuing: + +```cpp +static void check(const char* step, const fluss::Result& r) { + if (!r.Ok()) { + std::cerr << step << " failed: code=" << r.error_code + << " msg=" << r.error_message << std::endl; + std::exit(1); + } +} +``` diff --git a/website/docs/user-guide/cpp/example/index.md b/website/docs/user-guide/cpp/example/index.md new file mode 100644 index 00000000..e6b59f72 --- /dev/null +++ b/website/docs/user-guide/cpp/example/index.md @@ -0,0 +1,70 @@ +--- +sidebar_position: 1 +--- +# Example + +Minimal working examples: connect to Fluss, create a table, write data, and read it back. + +```cpp +#include +#include "fluss.hpp" + +static void check(const char* step, const fluss::Result& r) { + if (!r.Ok()) { + std::cerr << step << " failed: " << r.error_message << std::endl; + std::exit(1); + } +} + +int main() { + // Connect + fluss::Configuration config; + config.bootstrap_server = "127.0.0.1:9123"; + + fluss::Connection conn; + check("create", fluss::Connection::Create(config, conn)); + + fluss::Admin admin; + check("get_admin", conn.GetAdmin(admin)); + + // Create a log table + fluss::TablePath table_path("fluss", "quickstart_cpp"); + auto schema = fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int()) + .AddColumn("name", fluss::DataType::String()) + .Build(); + auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .Build(); + check("create_table", admin.CreateTable(table_path, descriptor, true)); + + // Write + fluss::Table table; + check("get_table", conn.GetTable(table_path, table)); + + fluss::AppendWriter writer; + check("new_writer", table.NewAppend().CreateWriter(writer)); + + fluss::GenericRow row; + row.SetInt32(0, 1); + row.SetString(1, "hello"); + check("append", writer.Append(row)); + check("flush", writer.Flush()); + + // Read + fluss::LogScanner scanner; + check("new_scanner", table.NewScan().CreateLogScanner(scanner)); + auto info = table.GetTableInfo(); + for (int b = 0; b < info.num_buckets; ++b) { + check("subscribe", scanner.Subscribe(b, 0)); + } + fluss::ScanRecords records; + check("poll", scanner.Poll(5000, records)); + for (const auto& rec : records) { + std::cout << "id=" << rec.row.GetInt32(0) + << ", name=" << rec.row.GetString(1) << std::endl; + } + + return 0; +} +``` diff --git a/website/docs/user-guide/cpp/example/log-tables.md b/website/docs/user-guide/cpp/example/log-tables.md new file mode 100644 index 00000000..8b4a840e --- /dev/null +++ b/website/docs/user-guide/cpp/example/log-tables.md @@ -0,0 +1,117 @@ +--- +sidebar_position: 4 +--- +# Log Tables + +Log tables are append-only tables without primary keys, suitable for event streaming. + +## Creating a Log Table + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("event_id", fluss::DataType::Int()) + .AddColumn("event_type", fluss::DataType::String()) + .AddColumn("timestamp", fluss::DataType::BigInt()) + .Build(); + +auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .Build(); + +fluss::TablePath table_path("fluss", "events"); +check("create_table", admin.CreateTable(table_path, descriptor, true)); +``` + +## Writing to Log Tables + +```cpp +fluss::Table table; +check("get_table", conn.GetTable(table_path, table)); + +fluss::AppendWriter writer; +check("new_writer", table.NewAppend().CreateWriter(writer)); + +fluss::GenericRow row; +row.SetInt32(0, 1); // event_id +row.SetString(1, "user_login"); // event_type +row.SetInt64(2, 1704067200000L); // timestamp +check("append", writer.Append(row)); + +check("flush", writer.Flush()); +``` + +## Reading from Log Tables + +```cpp +fluss::LogScanner scanner; +check("new_scanner", table.NewScan().CreateLogScanner(scanner)); + +auto info = table.GetTableInfo(); +for (int b = 0; b < info.num_buckets; ++b) { + check("subscribe", scanner.Subscribe(b, 0)); +} + +fluss::ScanRecords records; +check("poll", scanner.Poll(5000, records)); // timeout in ms + +for (const auto& rec : records) { + std::cout << "event_id=" << rec.row.GetInt32(0) + << " event_type=" << rec.row.GetString(1) + << " timestamp=" << rec.row.GetInt64(2) + << " @ offset=" << rec.offset << std::endl; +} +``` + +**Batch subscribe:** + +```cpp +std::vector subscriptions; +subscriptions.push_back({0, 0}); // bucket 0, offset 0 +subscriptions.push_back({1, 100}); // bucket 1, offset 100 +check("subscribe_batch", scanner.Subscribe(subscriptions)); +``` + +**Arrow RecordBatch polling (high performance):** + +```cpp +#include + +fluss::LogScanner arrow_scanner; +check("new_scanner", table.NewScan().CreateRecordBatchScanner(arrow_scanner)); + +for (int b = 0; b < info.num_buckets; ++b) { + check("subscribe", arrow_scanner.Subscribe(b, 0)); +} + +fluss::ArrowRecordBatches batches; +check("poll", arrow_scanner.PollRecordBatch(5000, batches)); + +for (size_t i = 0; i < batches.Size(); ++i) { + const auto& batch = batches[i]; + if (batch->Available()) { + auto arrow_batch = batch->GetArrowRecordBatch(); + std::cout << "Batch " << i << ": " << arrow_batch->num_rows() << " rows" + << ", partition_id=" << batch->GetPartitionId() + << ", bucket_id=" << batch->GetBucketId() << std::endl; + } +} +``` + +## Column Projection + +```cpp +// Project by column index +fluss::LogScanner projected_scanner; +check("new_scanner", + table.NewScan().ProjectByIndex({0, 2}).CreateLogScanner(projected_scanner)); + +// Project by column name +fluss::LogScanner name_projected_scanner; +check("new_scanner", + table.NewScan().ProjectByName({"event_id", "timestamp"}).CreateLogScanner(name_projected_scanner)); + +// Arrow RecordBatch with projection +fluss::LogScanner projected_arrow_scanner; +check("new_scanner", + table.NewScan().ProjectByIndex({0, 2}).CreateRecordBatchScanner(projected_arrow_scanner)); +``` diff --git a/website/docs/user-guide/cpp/example/partitioned-tables.md b/website/docs/user-guide/cpp/example/partitioned-tables.md new file mode 100644 index 00000000..61a6dacd --- /dev/null +++ b/website/docs/user-guide/cpp/example/partitioned-tables.md @@ -0,0 +1,175 @@ +--- +sidebar_position: 6 +--- +# Partitioned Tables + +Partitioned tables distribute data across partitions based on partition column values, enabling efficient data organization and querying. Both log tables and primary key tables support partitioning. + +## Partitioned Log Tables + +### Creating a Partitioned Log Table + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("event_id", fluss::DataType::Int()) + .AddColumn("event_type", fluss::DataType::String()) + .AddColumn("dt", fluss::DataType::String()) + .AddColumn("region", fluss::DataType::String()) + .Build(); + +auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetPartitionKeys({"dt", "region"}) + .SetBucketCount(3) + .Build(); + +fluss::TablePath table_path("fluss", "partitioned_events"); +check("create_table", admin.CreateTable(table_path, descriptor, true)); +``` + +### Writing to Partitioned Log Tables + +**Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** Include partition column values in each row — the client routes records to the correct partition automatically. + +```cpp +fluss::Table table; +check("get_table", conn.GetTable(table_path, table)); + +fluss::AppendWriter writer; +check("new_writer", table.NewAppend().CreateWriter(writer)); + +fluss::GenericRow row; +row.SetInt32(0, 1); +row.SetString(1, "user_login"); +row.SetString(2, "2024-01-15"); +row.SetString(3, "US"); +check("append", writer.Append(row)); +check("flush", writer.Flush()); +``` + +### Reading from Partitioned Log Tables + +For partitioned tables, use partition-aware subscribe methods. + +```cpp +fluss::Table table; +check("get_table", conn.GetTable(table_path, table)); + +fluss::LogScanner scanner; +check("new_scanner", table.NewScan().CreateLogScanner(scanner)); + +// Subscribe to individual partitions +for (const auto& pi : partition_infos) { + check("subscribe", scanner.SubscribePartitionBuckets(pi.partition_id, 0, 0)); +} + +fluss::ScanRecords records; +check("poll", scanner.Poll(5000, records)); + +for (const auto& rec : records) { + std::cout << "bucket_id=" << rec.bucket_id + << " offset=" << rec.offset << std::endl; +} + +// Or batch-subscribe to all partitions at once +fluss::LogScanner batch_scanner; +check("new_scanner", table.NewScan().CreateLogScanner(batch_scanner)); + +std::vector subs; +for (const auto& pi : partition_infos) { + subs.push_back({pi.partition_id, 0, 0}); +} +check("subscribe", batch_scanner.SubscribePartitionBuckets(subs)); +``` + +### Managing Partitions + +```cpp +// Create a partition +check("create_partition", + admin.CreatePartition(table_path, {{"dt", "2024-01-15"}, {"region", "EMEA"}}, true)); + +// List partitions +std::vector partition_infos; +check("list_partitions", admin.ListPartitionInfos(table_path, partition_infos)); + +// Query partition offsets +std::vector bucket_ids = {0, 1, 2}; +std::unordered_map offsets; +check("list_partition_offsets", + admin.ListPartitionOffsets(table_path, "2024-01-15$US", + bucket_ids, fluss::OffsetQuery::Latest(), offsets)); +``` + +## Partitioned Primary Key Tables + +Partitioned KV tables combine partitioning with primary key operations. Partition columns must be part of the primary key. + +### Creating a Partitioned Primary Key Table + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("user_id", fluss::DataType::Int()) + .AddColumn("region", fluss::DataType::String()) + .AddColumn("zone", fluss::DataType::BigInt()) + .AddColumn("score", fluss::DataType::BigInt()) + .SetPrimaryKeys({"user_id", "region", "zone"}) + .Build(); + +auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetPartitionKeys({"region", "zone"}) + .SetBucketCount(3) + .Build(); + +fluss::TablePath table_path("fluss", "partitioned_users"); +check("create_table", admin.CreateTable(table_path, descriptor, true)); +``` + +### Writing to Partitioned Primary Key Tables + +**Partitions must exist before upserting data, otherwise the client will by default retry indefinitely.** + +```cpp +fluss::Table table; +check("get_table", conn.GetTable(table_path, table)); + +// Create partitions first +check("create_APAC", admin.CreatePartition(table_path, {{"region", "APAC"}, {"zone", "1"}}, true)); +check("create_EMEA", admin.CreatePartition(table_path, {{"region", "EMEA"}, {"zone", "2"}}, true)); +check("create_US", admin.CreatePartition(table_path, {{"region", "US"}, {"zone", "3"}}, true)); + +fluss::UpsertWriter writer; +check("new_writer", table.NewUpsert().CreateWriter(writer)); + +auto row = table.NewRow(); +row.Set("user_id", 1001); +row.Set("region", "APAC"); +row.Set("zone", static_cast(1)); +row.Set("score", static_cast(1234)); +check("upsert", writer.Upsert(row)); +check("flush", writer.Flush()); +``` + +### Looking Up Records in Partitioned Tables + +Lookup requires all primary key columns including partition columns. + +> **Note:** Scanning partitioned primary key tables is not supported. Use lookup operations instead. + +```cpp +fluss::Lookuper lookuper; +check("new_lookuper", table.NewLookup().CreateLookuper(lookuper)); + +auto pk = table.NewRow(); +pk.Set("user_id", 1001); +pk.Set("region", "APAC"); +pk.Set("zone", static_cast(1)); + +bool found = false; +fluss::GenericRow result; +check("lookup", lookuper.Lookup(pk, found, result)); +if (found) { + std::cout << "score=" << result.GetInt64(3) << std::endl; +} +``` diff --git a/website/docs/user-guide/cpp/example/primary-key-tables.md b/website/docs/user-guide/cpp/example/primary-key-tables.md new file mode 100644 index 00000000..196c4a4d --- /dev/null +++ b/website/docs/user-guide/cpp/example/primary-key-tables.md @@ -0,0 +1,135 @@ +--- +sidebar_position: 5 +--- +# Primary Key Tables + +Primary key tables (KV tables) support upsert, delete, and lookup operations. + +## Creating a Primary Key Table + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int()) + .AddColumn("name", fluss::DataType::String()) + .AddColumn("age", fluss::DataType::BigInt()) + .SetPrimaryKeys({"id"}) + .Build(); + +auto descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetBucketCount(3) + .Build(); + +fluss::TablePath table_path("fluss", "users"); +check("create_table", admin.CreateTable(table_path, descriptor, true)); +``` + +## Upserting Records + +```cpp +fluss::Table table; +check("get_table", conn.GetTable(table_path, table)); + +fluss::UpsertWriter upsert_writer; +check("new_upsert_writer", table.NewUpsert().CreateWriter(upsert_writer)); + +// Fire-and-forget upserts +{ + auto row = table.NewRow(); + row.Set("id", 1); + row.Set("name", "Alice"); + row.Set("age", static_cast(25)); + check("upsert", upsert_writer.Upsert(row)); +} +{ + auto row = table.NewRow(); + row.Set("id", 2); + row.Set("name", "Bob"); + row.Set("age", static_cast(30)); + check("upsert", upsert_writer.Upsert(row)); +} +check("flush", upsert_writer.Flush()); + +// Per-record acknowledgment +{ + auto row = table.NewRow(); + row.Set("id", 3); + row.Set("name", "Charlie"); + row.Set("age", static_cast(35)); + fluss::WriteResult wr; + check("upsert", upsert_writer.Upsert(row, wr)); + check("wait", wr.Wait()); +} +``` + +## Updating Records + +Upsert with the same primary key to update an existing record. + +```cpp +auto row = table.NewRow(); +row.Set("id", 1); +row.Set("name", "Alice Updated"); +row.Set("age", static_cast(26)); +fluss::WriteResult wr; +check("upsert", upsert_writer.Upsert(row, wr)); +check("wait", wr.Wait()); +``` + +## Deleting Records + +```cpp +auto pk_row = table.NewRow(); +pk_row.Set("id", 2); +fluss::WriteResult wr; +check("delete", upsert_writer.Delete(pk_row, wr)); +check("wait", wr.Wait()); +``` + +## Partial Updates + +Update only specific columns while preserving others. + +```cpp +// By column names +fluss::UpsertWriter partial_writer; +check("new_partial_writer", + table.NewUpsert() + .PartialUpdateByName({"id", "age"}) + .CreateWriter(partial_writer)); + +auto row = table.NewRow(); +row.Set("id", 1); +row.Set("age", static_cast(27)); +fluss::WriteResult wr; +check("partial_upsert", partial_writer.Upsert(row, wr)); +check("wait", wr.Wait()); + +// By column indices +fluss::UpsertWriter partial_writer_idx; +check("new_partial_writer", + table.NewUpsert() + .PartialUpdateByIndex({0, 2}) + .CreateWriter(partial_writer_idx)); +``` + +## Looking Up Records + +```cpp +fluss::Lookuper lookuper; +check("new_lookuper", table.NewLookup().CreateLookuper(lookuper)); + +auto pk_row = table.NewRow(); +pk_row.Set("id", 1); + +bool found = false; +fluss::GenericRow result_row; +check("lookup", lookuper.Lookup(pk_row, found, result_row)); + +if (found) { + std::cout << "Found: name=" << result_row.GetString(1) + << ", age=" << result_row.GetInt64(2) << std::endl; +} else { + std::cout << "Not found" << std::endl; +} +``` diff --git a/website/docs/user-guide/cpp/installation.md b/website/docs/user-guide/cpp/installation.md new file mode 100644 index 00000000..e28093e6 --- /dev/null +++ b/website/docs/user-guide/cpp/installation.md @@ -0,0 +1,107 @@ +--- +sidebar_position: 1 +--- +# Installation + +The C++ bindings are not yet published as a package. You need to build from source. + +**Prerequisites:** CMake 3.22+, C++17 compiler, Rust 1.85+, Apache Arrow C++ library + +```bash +git clone https://github.com/apache/fluss-rust.git +cd fluss-rust +``` + +Install dependencies: + +```bash +# macOS +brew install cmake arrow + +# Ubuntu/Debian +sudo apt-get install cmake libarrow-dev +``` + +If Arrow is not available via package manager, build from source: + +```bash +git clone https://github.com/apache/arrow.git +cd arrow/cpp +cmake -B build -DARROW_BUILD_SHARED=ON +cmake --build build +sudo cmake --install build +``` + +Build the C++ bindings: + +```bash +cd bindings/cpp +mkdir -p build && cd build + +# Debug mode +cmake .. + +# Or Release mode +cmake -DCMAKE_BUILD_TYPE=Release .. + +# Build +cmake --build . +``` + +This produces: +- `libfluss_cpp.a` — Static library +- `fluss_cpp_example` — Example executable +- Header files in `include/` + +## Integrating into Your Project + +**Option 1: CMake FetchContent** + +```cmake +include(FetchContent) +FetchContent_Declare( + fluss-cpp + GIT_REPOSITORY https://github.com/apache/fluss-rust.git + SOURCE_SUBDIR bindings/cpp +) +FetchContent_MakeAvailable(fluss-cpp) + +target_link_libraries(your_target PRIVATE fluss_cpp) +``` + +**Option 2: Manual Integration** + +Copy the build artifacts and configure CMake: + +```cmake +find_package(Arrow REQUIRED) + +add_library(fluss_cpp STATIC IMPORTED) +set_target_properties(fluss_cpp PROPERTIES + IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libfluss_cpp.a + INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/include +) + +target_link_libraries(your_target + PRIVATE + fluss_cpp + Arrow::arrow_shared + ${CMAKE_DL_LIBS} + Threads::Threads +) + +# On macOS, also link these frameworks +if(APPLE) + target_link_libraries(your_target PRIVATE + "-framework CoreFoundation" + "-framework Security" + ) +endif() +``` + +**Option 3: Subdirectory** + +```cmake +add_subdirectory(vendor/fluss-rust/bindings/cpp) +target_link_libraries(your_target PRIVATE fluss_cpp) +``` diff --git a/website/docs/user-guide/python/_category_.json b/website/docs/user-guide/python/_category_.json new file mode 100644 index 00000000..a9f34b47 --- /dev/null +++ b/website/docs/user-guide/python/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Python", + "position": 2 +} diff --git a/bindings/python/API_REFERENCE.md b/website/docs/user-guide/python/api-reference.md similarity index 87% rename from bindings/python/API_REFERENCE.md rename to website/docs/user-guide/python/api-reference.md index 3749ee1d..9f6ce766 100644 --- a/bindings/python/API_REFERENCE.md +++ b/website/docs/user-guide/python/api-reference.md @@ -1,24 +1,9 @@ - - -# Python API Reference - -Complete API reference for the Fluss Python client. For a usage guide with examples, see the [Python Client Guide](README.md). +Complete API reference for the Fluss Python client. ## `Config` @@ -288,18 +273,3 @@ Raised for all Fluss-specific errors (connection failures, table not found, sche | `ChangeType.UpdateBefore` (2) | `-U` | Previous value of updated row | | `ChangeType.UpdateAfter` (3) | `+U` | New value of updated row | | `ChangeType.Delete` (4) | `-D` | Delete | - -## Data Types - -| PyArrow Type | Fluss Type | Python Type | -|---|---|---| -| `pa.boolean()` | Boolean | `bool` | -| `pa.int8()` / `int16()` / `int32()` / `int64()` | TinyInt / SmallInt / Int / BigInt | `int` | -| `pa.float32()` / `float64()` | Float / Double | `float` | -| `pa.string()` | String | `str` | -| `pa.binary()` | Bytes | `bytes` | -| `pa.date32()` | Date | `datetime.date` | -| `pa.time32("ms")` | Time | `datetime.time` | -| `pa.timestamp("us")` | Timestamp (NTZ) | `datetime.datetime` | -| `pa.timestamp("us", tz="UTC")` | TimestampLTZ | `datetime.datetime` | -| `pa.decimal128(precision, scale)` | Decimal | `decimal.Decimal` | diff --git a/website/docs/user-guide/python/data-types.md b/website/docs/user-guide/python/data-types.md new file mode 100644 index 00000000..2976ad0c --- /dev/null +++ b/website/docs/user-guide/python/data-types.md @@ -0,0 +1,21 @@ +--- +sidebar_position: 3 +--- +# Data Types + +The Python client uses PyArrow types for schema definitions: + +| PyArrow Type | Fluss Type | Python Type | +|---|---|---| +| `pa.boolean()` | Boolean | `bool` | +| `pa.int8()` / `int16()` / `int32()` / `int64()` | TinyInt / SmallInt / Int / BigInt | `int` | +| `pa.float32()` / `float64()` | Float / Double | `float` | +| `pa.string()` | String | `str` | +| `pa.binary()` | Bytes | `bytes` | +| `pa.date32()` | Date | `datetime.date` | +| `pa.time32("ms")` | Time | `datetime.time` | +| `pa.timestamp("us")` | Timestamp (NTZ) | `datetime.datetime` | +| `pa.timestamp("us", tz="UTC")` | TimestampLTZ | `datetime.datetime` | +| `pa.decimal128(precision, scale)` | Decimal | `decimal.Decimal` | + +All Python native types (`date`, `time`, `datetime`, `Decimal`) work when appending rows via dicts. diff --git a/website/docs/user-guide/python/error-handling.md b/website/docs/user-guide/python/error-handling.md new file mode 100644 index 00000000..955ea76b --- /dev/null +++ b/website/docs/user-guide/python/error-handling.md @@ -0,0 +1,19 @@ +--- +sidebar_position: 4 +--- +# Error Handling + +The client raises `fluss.FlussError` for Fluss-specific errors: + +```python +try: + await admin.create_table(table_path, table_descriptor) +except fluss.FlussError as e: + print(f"Fluss error: {e.message}") +``` + +Common error scenarios: +- **Connection refused** — Fluss cluster is not running or wrong address in `bootstrap.servers` +- **Table not found** — table doesn't exist or wrong database/table name +- **Partition not found** — writing to a partitioned table before creating partitions +- **Schema mismatch** — row data doesn't match the table schema diff --git a/website/docs/user-guide/python/example/_category_.json b/website/docs/user-guide/python/example/_category_.json new file mode 100644 index 00000000..dd222949 --- /dev/null +++ b/website/docs/user-guide/python/example/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Example", + "position": 5 +} diff --git a/website/docs/user-guide/python/example/admin-operations.md b/website/docs/user-guide/python/example/admin-operations.md new file mode 100644 index 00000000..226284e7 --- /dev/null +++ b/website/docs/user-guide/python/example/admin-operations.md @@ -0,0 +1,77 @@ +--- +sidebar_position: 3 +--- +# Admin Operations + +```python +admin = await conn.get_admin() +``` + +## Databases + +```python +await admin.create_database("my_database", ignore_if_exists=True) +databases = await admin.list_databases() +exists = await admin.database_exists("my_database") +await admin.drop_database("my_database", ignore_if_not_exists=True, cascade=True) +``` + +## Tables + +Schemas are defined using PyArrow and wrapped in `fluss.Schema`: + +```python +import pyarrow as pa + +schema = fluss.Schema(pa.schema([ + pa.field("id", pa.int32()), + pa.field("name", pa.string()), + pa.field("amount", pa.int64()), +])) + +table_path = fluss.TablePath("my_database", "my_table") +await admin.create_table(table_path, fluss.TableDescriptor(schema), ignore_if_exists=True) + +table_info = await admin.get_table(table_path) +tables = await admin.list_tables("my_database") +await admin.drop_table(table_path, ignore_if_not_exists=True) +``` + +### TableDescriptor Options + +`TableDescriptor` accepts these optional parameters: + +| Parameter | Description | +|---|---| +| `partition_keys` | Column names to partition by (e.g. `["region"]`) | +| `bucket_count` | Number of buckets (parallelism units) for the table | +| `bucket_keys` | Columns used to determine bucket assignment | +| `comment` | Table comment / description | +| `log_format` | Log storage format: `"ARROW"` or `"INDEXED"` | +| `kv_format` | KV storage format for primary key tables: `"INDEXED"` or `"COMPACTED"` | +| `properties` | Table configuration properties as a dict (e.g. `{"table.replication.factor": "1"}`) | +| `custom_properties` | User-defined properties as a dict | + +## Offsets + +```python +# Latest offsets for buckets +offsets = await admin.list_offsets(table_path, bucket_ids=[0, 1], offset_type="latest") + +# By timestamp +offsets = await admin.list_offsets(table_path, bucket_ids=[0], offset_type="timestamp", timestamp=1704067200000) + +# Per-partition offsets +offsets = await admin.list_partition_offsets(table_path, partition_name="US", bucket_ids=[0], offset_type="latest") +``` + +## Lake Snapshot + +```python +snapshot = await admin.get_latest_lake_snapshot(table_path) +print(f"Snapshot ID: {snapshot.snapshot_id}") +print(f"Table buckets: {snapshot.get_table_buckets()}") + +bucket = fluss.TableBucket(table_id=1, bucket=0) +offset = snapshot.get_bucket_offset(bucket) +``` diff --git a/website/docs/user-guide/python/example/configuration.md b/website/docs/user-guide/python/example/configuration.md new file mode 100644 index 00000000..1de82d3a --- /dev/null +++ b/website/docs/user-guide/python/example/configuration.md @@ -0,0 +1,34 @@ +--- +sidebar_position: 2 +--- +# Configuration + +```python +import fluss + +config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"}) +conn = await fluss.FlussConnection.connect(config) +``` + +The connection also supports context managers: + +```python +with await fluss.FlussConnection.connect(config) as conn: + ... +``` + +## Configuration Options + +| Key | Description | Default | +|-----|-------------|---------| +| `bootstrap.servers` | Coordinator server address | `127.0.0.1:9123` | +| `request.max.size` | Maximum request size in bytes | `10485760` (10 MB) | +| `writer.acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | +| `writer.retries` | Number of retries on failure | `2147483647` | +| `writer.batch.size` | Batch size for writes in bytes | `2097152` (2 MB) | + +Remember to close the connection when done: + +```python +conn.close() +``` diff --git a/website/docs/user-guide/python/example/index.md b/website/docs/user-guide/python/example/index.md new file mode 100644 index 00000000..67ee9202 --- /dev/null +++ b/website/docs/user-guide/python/example/index.md @@ -0,0 +1,46 @@ +--- +sidebar_position: 1 +--- +# Example + +Minimal working example: connect to Fluss, create a table, write data, and read it back. + +```python +import asyncio +import pyarrow as pa +import fluss + +async def main(): + # Connect + config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"}) + conn = await fluss.FlussConnection.connect(config) + admin = await conn.get_admin() + + # Create a log table + schema = fluss.Schema(pa.schema([ + pa.field("id", pa.int32()), + pa.field("name", pa.string()), + pa.field("score", pa.float32()), + ])) + table_path = fluss.TablePath("fluss", "quick_start") + await admin.create_table(table_path, fluss.TableDescriptor(schema), ignore_if_exists=True) + + # Write + table = await conn.get_table(table_path) + writer = await table.new_append_writer() + writer.append({"id": 1, "name": "Alice", "score": 95.5}) + writer.append({"id": 2, "name": "Bob", "score": 87.0}) + await writer.flush() + + # Read + num_buckets = (await admin.get_table(table_path)).num_buckets + scanner = await table.new_scan().create_batch_scanner() + scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) + print(scanner.to_pandas()) + + # Cleanup + await admin.drop_table(table_path, ignore_if_not_exists=True) + conn.close() + +asyncio.run(main()) +``` diff --git a/website/docs/user-guide/python/example/log-tables.md b/website/docs/user-guide/python/example/log-tables.md new file mode 100644 index 00000000..40e232de --- /dev/null +++ b/website/docs/user-guide/python/example/log-tables.md @@ -0,0 +1,110 @@ +--- +sidebar_position: 4 +--- +# Log Tables + +Log tables are append-only tables without primary keys, suitable for event streaming. + +## Creating a Log Table + +```python +import pyarrow as pa + +schema = fluss.Schema(pa.schema([ + pa.field("id", pa.int32()), + pa.field("name", pa.string()), + pa.field("score", pa.float32()), +])) + +table_path = fluss.TablePath("fluss", "events") +await admin.create_table(table_path, fluss.TableDescriptor(schema), ignore_if_exists=True) +``` + +## Writing + +Rows can be appended as dicts, lists, or tuples. For bulk writes, use `write_arrow()`, `write_arrow_batch()`, or `write_pandas()`. + +Write methods like `append()` and `write_arrow_batch()` return a `WriteResultHandle`. You can ignore it for fire-and-forget semantics (flush at the end), or `await handle.wait()` to block until the server acknowledges that specific write. + +```python +table = await conn.get_table(table_path) +writer = await table.new_append_writer() + +# Fire-and-forget: queue writes, flush at the end +writer.append({"id": 1, "name": "Alice", "score": 95.5}) +writer.append([2, "Bob", 87.0]) +await writer.flush() + +# Per-record acknowledgment +handle = writer.append({"id": 3, "name": "Charlie", "score": 91.0}) +await handle.wait() + +# Bulk writes +writer.write_arrow(pa_table) # PyArrow Table +writer.write_arrow_batch(record_batch) # PyArrow RecordBatch +writer.write_pandas(df) # Pandas DataFrame +await writer.flush() +``` + +## Reading + +There are two scanner types: +- **Batch scanner** (`create_batch_scanner()`) — returns Arrow Tables or DataFrames, best for analytics +- **Record scanner** (`create_log_scanner()`) — returns individual records with metadata (offset, timestamp, change type), best for streaming + +And two reading modes: +- **`to_arrow()` / `to_pandas()`** — reads all data from subscribed buckets up to the current latest offset, then returns. Best for one-shot batch reads. +- **`poll_arrow()` / `poll()` / `poll_batches()`** — returns whatever data is available within the timeout, then returns. Call in a loop for continuous streaming. + +### Batch Read (One-Shot) + +```python +num_buckets = (await admin.get_table(table_path)).num_buckets + +scanner = await table.new_scan().create_batch_scanner() +scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) + +# Reads everything up to current latest offset, then returns +arrow_table = scanner.to_arrow() +df = scanner.to_pandas() +``` + +### Continuous Polling + +Use `poll_arrow()` or `poll()` in a loop for streaming consumption: + +```python +# Batch scanner: poll as Arrow Tables +scanner = await table.new_scan().create_batch_scanner() +scanner.subscribe(bucket_id=0, start_offset=fluss.EARLIEST_OFFSET) + +while True: + result = scanner.poll_arrow(timeout_ms=5000) + if result.num_rows > 0: + print(result.to_pandas()) + +# Record scanner: poll individual records with metadata +scanner = await table.new_scan().create_log_scanner() +scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) + +while True: + for record in scanner.poll(timeout_ms=5000): + print(f"offset={record.offset}, change={record.change_type.short_string()}, row={record.row}") +``` + +### Subscribe from Latest Offset + +To only consume new records (skip existing data), use `LATEST_OFFSET`: + +```python +scanner = await table.new_scan().create_batch_scanner() +scanner.subscribe(bucket_id=0, start_offset=fluss.LATEST_OFFSET) +``` + +## Column Projection + +```python +scanner = await table.new_scan().project([0, 2]).create_batch_scanner() +# or by name +scanner = await table.new_scan().project_by_name(["id", "score"]).create_batch_scanner() +``` diff --git a/website/docs/user-guide/python/example/partitioned-tables.md b/website/docs/user-guide/python/example/partitioned-tables.md new file mode 100644 index 00000000..8b3eb1f3 --- /dev/null +++ b/website/docs/user-guide/python/example/partitioned-tables.md @@ -0,0 +1,96 @@ +--- +sidebar_position: 6 +--- +# Partitioned Tables + +Partitioned tables distribute data across partitions based on column values. Partitions must exist before writing data, otherwise the client will by default retry indefinitely. + +## Creating and Managing Partitions + +```python +import pyarrow as pa + +schema = fluss.Schema(pa.schema([ + pa.field("id", pa.int32()), + pa.field("region", pa.string()), + pa.field("value", pa.int64()), +])) + +table_path = fluss.TablePath("fluss", "partitioned_events") +await admin.create_table( + table_path, + fluss.TableDescriptor(schema, partition_keys=["region"], bucket_count=1), + ignore_if_exists=True, +) + +# Create partitions +await admin.create_partition(table_path, {"region": "US"}, ignore_if_exists=True) +await admin.create_partition(table_path, {"region": "EU"}, ignore_if_exists=True) + +# List partitions +partition_infos = await admin.list_partition_infos(table_path) +``` + +## Writing + +Same as non-partitioned tables - include partition column values in each row. **Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** + +```python +table = await conn.get_table(table_path) +writer = await table.new_append_writer() +writer.append({"id": 1, "region": "US", "value": 100}) +writer.append({"id": 2, "region": "EU", "value": 200}) +await writer.flush() +``` + +## Reading + +Use `subscribe_partition()` or `subscribe_partition_buckets()` instead of `subscribe()`: + +```python +scanner = await table.new_scan().create_batch_scanner() + +# Subscribe to individual partitions +for p in partition_infos: + scanner.subscribe_partition(partition_id=p.partition_id, bucket_id=0, start_offset=fluss.EARLIEST_OFFSET) + +# Or batch-subscribe +scanner.subscribe_partition_buckets({ + (p.partition_id, 0): fluss.EARLIEST_OFFSET for p in partition_infos +}) + +print(scanner.to_pandas()) +``` + +## Partitioned Primary Key Tables + +Partition columns must be part of the primary key. Partitions must exist before upserting data, otherwise the client will by default retry indefinitely. + +```python +schema = fluss.Schema( + pa.schema([ + pa.field("user_id", pa.int32()), + pa.field("region", pa.string()), + pa.field("score", pa.int64()), + ]), + primary_keys=["user_id", "region"], +) + +table_path = fluss.TablePath("fluss", "partitioned_users") +await admin.create_table( + table_path, + fluss.TableDescriptor(schema, partition_keys=["region"]), + ignore_if_exists=True, +) + +await admin.create_partition(table_path, {"region": "US"}, ignore_if_exists=True) + +table = await conn.get_table(table_path) +writer = table.new_upsert() +writer.upsert({"user_id": 1, "region": "US", "score": 1234}) +await writer.flush() + +# Lookup includes partition columns +lookuper = table.new_lookup() +result = await lookuper.lookup({"user_id": 1, "region": "US"}) +``` diff --git a/website/docs/user-guide/python/example/primary-key-tables.md b/website/docs/user-guide/python/example/primary-key-tables.md new file mode 100644 index 00000000..13fc05e8 --- /dev/null +++ b/website/docs/user-guide/python/example/primary-key-tables.md @@ -0,0 +1,61 @@ +--- +sidebar_position: 5 +--- +# Primary Key Tables + +Primary key tables support upsert, delete, and point lookup operations. + +## Creating a Primary Key Table + +Pass `primary_keys` to `fluss.Schema`: + +```python +import pyarrow as pa + +schema = fluss.Schema( + pa.schema([ + pa.field("id", pa.int32()), + pa.field("name", pa.string()), + pa.field("age", pa.int64()), + ]), + primary_keys=["id"], +) +table_path = fluss.TablePath("fluss", "users") +await admin.create_table(table_path, fluss.TableDescriptor(schema, bucket_count=3), ignore_if_exists=True) +``` + +## Upsert, Delete, Lookup + +```python +table = await conn.get_table(table_path) + +# Upsert (fire-and-forget, flush at the end) +writer = table.new_upsert() +writer.upsert({"id": 1, "name": "Alice", "age": 25}) +writer.upsert({"id": 2, "name": "Bob", "age": 30}) +await writer.flush() + +# Per-record acknowledgment (for read-after-write) +handle = writer.upsert({"id": 3, "name": "Charlie", "age": 35}) +await handle.wait() + +# Delete by primary key +handle = writer.delete({"id": 2}) +await handle.wait() + +# Lookup +lookuper = table.new_lookup() +result = await lookuper.lookup({"id": 1}) +if result: + print(f"Found: name={result['name']}, age={result['age']}") +``` + +## Partial Updates + +Update specific columns while preserving others: + +```python +partial_writer = table.new_upsert(columns=["id", "age"]) +partial_writer.upsert({"id": 1, "age": 27}) # only updates age +await partial_writer.flush() +``` diff --git a/website/docs/user-guide/python/installation.md b/website/docs/user-guide/python/installation.md new file mode 100644 index 00000000..d5918aea --- /dev/null +++ b/website/docs/user-guide/python/installation.md @@ -0,0 +1,41 @@ +--- +sidebar_position: 1 +--- +# Installation + +```bash +pip install pyfluss +``` + +To build from source instead: + +**Prerequisites:** Python 3.9+, Rust 1.85+ + +```bash +git clone https://github.com/apache/fluss-rust.git +cd fluss-rust/bindings/python +``` + +Install [maturin](https://github.com/PyO3/maturin): + +```bash +pip install maturin +``` + +Build and install: + +```bash +# Development mode (editable) +maturin develop + +# Or build a wheel +maturin build --release +pip install target/wheels/fluss-*.whl +``` + +Verify: + +```python +import fluss +print("Fluss Python bindings installed successfully!") +``` diff --git a/website/docs/user-guide/rust/_category_.json b/website/docs/user-guide/rust/_category_.json new file mode 100644 index 00000000..cdec432d --- /dev/null +++ b/website/docs/user-guide/rust/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Rust", + "position": 1 +} diff --git a/website/docs/user-guide/rust/api-reference.md b/website/docs/user-guide/rust/api-reference.md new file mode 100644 index 00000000..55841cb1 --- /dev/null +++ b/website/docs/user-guide/rust/api-reference.md @@ -0,0 +1,427 @@ +--- +sidebar_position: 2 +--- +# API Reference + +Complete API reference for the Fluss Rust client. + +## `Config` + +| Field | Type | Default | Description | +|---|---|---|---| +| `bootstrap_server` | `String` | `"127.0.0.1:9123"` | Coordinator server address | +| `request_max_size` | `i32` | `10485760` (10 MB) | Maximum request size in bytes | +| `writer_acks` | `String` | `"all"` | Acknowledgment setting (`"all"` waits for all replicas) | +| `writer_retries` | `i32` | `i32::MAX` | Number of retries on failure | +| `writer_batch_size` | `i32` | `2097152` (2 MB) | Batch size for writes in bytes | +| `scanner_remote_log_prefetch_num` | `usize` | `4` | Number of remote log segments to prefetch | +| `scanner_remote_log_download_threads` | `usize` | `3` | Number of threads for remote log downloads | + +## `FlussConnection` + +| Method | Description | +|---|---| +| `async fn new(config: Config) -> Result` | Create a new connection to a Fluss cluster | +| `async fn get_admin(&self) -> Result` | Get the admin interface for cluster management | +| `async fn get_table(&self, table_path: &TablePath) -> Result>` | Get a table for read/write operations | +| `fn config(&self) -> &Config` | Get a reference to the connection config | + +## `FlussAdmin` + +### Database Operations + +| Method | Description | +|---|---| +| `async fn create_database(&self, name: &str, ignore_if_exists: bool, descriptor: Option<&DatabaseDescriptor>) -> Result<()>` | Create a database | +| `async fn drop_database(&self, name: &str, ignore_if_not_exists: bool, cascade: bool) -> Result<()>` | Drop a database | +| `async fn list_databases(&self) -> Result>` | List all databases | +| `async fn database_exists(&self, name: &str) -> Result` | Check if a database exists | +| `async fn get_database_info(&self, name: &str) -> Result` | Get database metadata | + +### Table Operations + +| Method | Description | +|---|---| +| `async fn create_table(&self, table_path: &TablePath, descriptor: &TableDescriptor, ignore_if_exists: bool) -> Result<()>` | Create a table | +| `async fn drop_table(&self, table_path: &TablePath, ignore_if_not_exists: bool) -> Result<()>` | Drop a table | +| `async fn get_table(&self, table_path: &TablePath) -> Result` | Get table metadata | +| `async fn list_tables(&self, database_name: &str) -> Result>` | List tables in a database | +| `async fn table_exists(&self, table_path: &TablePath) -> Result` | Check if a table exists | + +### Partition Operations + +| Method | Description | +|---|---| +| `async fn list_partition_infos(&self, table_path: &TablePath) -> Result>` | List all partitions | +| `async fn create_partition(&self, table_path: &TablePath, spec: &PartitionSpec, ignore_if_exists: bool) -> Result<()>` | Create a partition | +| `async fn drop_partition(&self, table_path: &TablePath, spec: &PartitionSpec, ignore_if_not_exists: bool) -> Result<()>` | Drop a partition | + +### Offset Operations + +| Method | Description | +|---|---| +| `async fn list_offsets(&self, table_path: &TablePath, bucket_ids: &[i32], offset_spec: OffsetSpec) -> Result>` | Get offsets for buckets | +| `async fn list_partition_offsets(&self, table_path: &TablePath, partition_name: &str, bucket_ids: &[i32], offset_spec: OffsetSpec) -> Result>` | Get offsets for a partition's buckets | + +### Lake Operations + +| Method | Description | +|---|---| +| `async fn get_latest_lake_snapshot(&self, table_path: &TablePath) -> Result` | Get the latest lake snapshot | + +## `FlussTable<'a>` + +| Method | Description | +|---|---| +| `fn get_table_info(&self) -> &TableInfo` | Get table metadata | +| `fn new_append(&self) -> Result` | Create an append builder for log tables | +| `fn new_scan(&self) -> TableScan<'_>` | Create a scan builder | +| `fn new_lookup(&self) -> Result` | Create a lookup builder for PK tables | +| `fn new_upsert(&self) -> Result` | Create an upsert builder for PK tables | +| `fn has_primary_key(&self) -> bool` | Check if the table has a primary key | +| `fn table_path(&self) -> &TablePath` | Get the table path | + +## `TableAppend` + +| Method | Description | +|---|---| +| `fn create_writer(&self) -> Result` | Create an append writer | + +## `AppendWriter` + +| Method | Description | +|---|---| +| `fn append(&self, row: &impl InternalRow) -> Result` | Append a row; returns a future for acknowledgment | +| `fn append_arrow_batch(&self, batch: RecordBatch) -> Result` | Append an Arrow RecordBatch | +| `async fn flush(&self) -> Result<()>` | Flush all pending writes to the server | + +## `TableScan<'a>` + +| Method | Description | +|---|---| +| `fn project(self, indices: &[usize]) -> Result` | Project columns by index | +| `fn project_by_name(self, names: &[&str]) -> Result` | Project columns by name | +| `fn create_log_scanner(self) -> Result` | Create a record-based log scanner | +| `fn create_record_batch_log_scanner(self) -> Result` | Create an Arrow batch-based log scanner | + +## `LogScanner` + +| Method | Description | +|---|---| +| `async fn subscribe(&self, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a bucket | +| `async fn subscribe_buckets(&self, bucket_offsets: &HashMap) -> Result<()>` | Subscribe to multiple buckets | +| `async fn subscribe_partition(&self, partition_id: i64, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a partition bucket | +| `async fn subscribe_partition_buckets(&self, offsets: &HashMap<(i64, i32), i64>) -> Result<()>` | Subscribe to multiple partition-bucket pairs | +| `async fn unsubscribe_partition(&self, partition_id: i64, bucket_id: i32) -> Result<()>` | Unsubscribe from a partition bucket | +| `async fn poll(&self, timeout: Duration) -> Result` | Poll for records | + +## `RecordBatchLogScanner` + +| Method | Description | +|---|---| +| `async fn subscribe(&self, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a bucket | +| `async fn subscribe_partition(&self, partition_id: i64, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a partition bucket | +| `async fn poll(&self, timeout: Duration) -> Result>` | Poll for Arrow record batches | + +## `ScanRecord` + +| Method | Description | +|---|---| +| `fn row(&self) -> &dyn InternalRow` | Get the row data | +| `fn offset(&self) -> i64` | Record offset in the log | +| `fn timestamp(&self) -> i64` | Record timestamp | +| `fn change_type(&self) -> &ChangeType` | Change type (AppendOnly, Insert, etc.) | + +## `ScanRecords` + +| Method | Description | +|---|---| +| `fn count(&self) -> usize` | Number of records | +| `fn is_empty(&self) -> bool` | Whether the result set is empty | +| `fn records(&self, bucket: &TableBucket) -> &[ScanRecord]` | Get records for a specific bucket | +| `fn records_by_buckets(&self) -> &HashMap>` | Get all records grouped by bucket | + +`ScanRecords` also implements `IntoIterator`, so you can iterate over all records directly: + +```rust +for record in records { + println!("offset={}", record.offset()); +} +``` + +## `ScanBatch` + +| Method | Description | +|---|---| +| `fn bucket(&self) -> &TableBucket` | Bucket this batch belongs to | +| `fn batch(&self) -> &RecordBatch` | Arrow RecordBatch data | +| `fn base_offset(&self) -> i64` | First record offset | +| `fn last_offset(&self) -> i64` | Last record offset | +| `fn num_records(&self) -> usize` | Number of records in the batch | + +## `TableUpsert` + +| Method | Description | +|---|---| +| `fn create_writer(&self) -> Result` | Create an upsert writer | +| `fn partial_update(&self, column_indices: Option>) -> Result` | Create a partial update builder by column indices | +| `fn partial_update_with_column_names(&self, names: &[&str]) -> Result` | Create a partial update builder by column names | + +## `UpsertWriter` + +| Method | Description | +|---|---| +| `fn upsert(&self, row: &impl InternalRow) -> Result` | Upsert a row (insert or update by PK) | +| `fn delete(&self, row: &impl InternalRow) -> Result` | Delete a row by primary key | +| `async fn flush(&self) -> Result<()>` | Flush all pending operations | + +## `TableLookup` + +| Method | Description | +|---|---| +| `fn create_lookuper(&self) -> Result` | Create a lookuper for point lookups | + +## `Lookuper` + +| Method | Description | +|---|---| +| `async fn lookup(&mut self, key: &impl InternalRow) -> Result` | Lookup a row by primary key | + +## `LookupResult` + +| Method | Description | +|---|---| +| `fn get_single_row(&self) -> Result>` | Get a single row from the result | +| `fn get_rows(&self) -> Vec` | Get all rows from the result | + +## `WriteResultFuture` + +| Description | +|---| +| Implements `Future>`. Await to wait for server acknowledgment. Returned by `append()`, `upsert()`, and `delete()`. | + +Usage: + +```rust +// Fire-and-forget (batched) +writer.append(&row)?; +writer.flush().await?; + +// Per-record acknowledgment +writer.append(&row)?.await?; +``` + +## `Schema` + +| Method | Description | +|---|---| +| `fn builder() -> SchemaBuilder` | Create a schema builder | +| `fn columns(&self) -> &[Column]` | Get all columns | +| `fn primary_key(&self) -> Option<&PrimaryKey>` | Get primary key (None if no primary key) | +| `fn column_names(&self) -> Vec<&str>` | Get all column names | +| `fn primary_key_indexes(&self) -> Vec` | Get primary key column indices | + +## `SchemaBuilder` + +| Method | Description | +|---|---| +| `fn column(name: &str, data_type: DataType) -> Self` | Add a column | +| `fn primary_key(keys: Vec<&str>) -> Self` | Set primary key columns | +| `fn build() -> Result` | Build the schema | + +## `TableDescriptor` + +| Method | Description | +|---|---| +| `fn builder() -> TableDescriptorBuilder` | Create a table descriptor builder | +| `fn schema(&self) -> &Schema` | Get the table schema | +| `fn partition_keys(&self) -> &[String]` | Get partition key column names | +| `fn has_primary_key(&self) -> bool` | Check if the table has a primary key | +| `fn properties(&self) -> &HashMap` | Get all table properties | +| `fn comment(&self) -> Option<&str>` | Get table comment | + +## `TableDescriptorBuilder` + +| Method | Description | +|---|---| +| `fn schema(schema: Schema) -> Self` | Set the schema | +| `fn log_format(format: LogFormat) -> Self` | Set log format (e.g., `LogFormat::ARROW`) | +| `fn kv_format(format: KvFormat) -> Self` | Set KV format (e.g., `KvFormat::COMPACTED`) | +| `fn property(key: &str, value: &str) -> Self` | Set a table property | +| `fn partitioned_by(keys: Vec<&str>) -> Self` | Set partition columns | +| `fn distributed_by(bucket_count: Option, bucket_keys: Vec) -> Self` | Set bucket distribution | +| `fn comment(comment: &str) -> Self` | Set table comment | +| `fn build() -> Result` | Build the table descriptor | + +## `TablePath` + +| Method | Description | +|---|---| +| `TablePath::new(database: &str, table: &str) -> Self` | Create a table path | +| `fn database(&self) -> &str` | Get database name | +| `fn table(&self) -> &str` | Get table name | + +## `TableInfo` + +| Field / Method | Description | +|---|---| +| `.table_path` | `TablePath` -- Table path | +| `.table_id` | `i64` -- Table ID | +| `.schema_id` | `i32` -- Schema ID | +| `.schema` | `Schema` -- Table schema | +| `.primary_keys` | `Vec` -- Primary key column names | +| `.partition_keys` | `Vec` -- Partition key column names | +| `.num_buckets` | `i32` -- Number of buckets | +| `.properties` | `HashMap` -- All table properties | +| `.custom_properties` | `HashMap` -- Custom properties only | +| `.comment` | `Option` -- Table comment | +| `.created_time` | `i64` -- Creation timestamp | +| `.modified_time` | `i64` -- Last modification timestamp | + +## `TableBucket` + +| Method | Description | +|---|---| +| `TableBucket::new(table_id: i64, bucket_id: i32) -> Self` | Create a non-partitioned bucket | +| `TableBucket::new_with_partition(table_id: i64, partition_id: Option, bucket_id: i32) -> Self` | Create a partitioned bucket | +| `fn table_id(&self) -> i64` | Get table ID | +| `fn partition_id(&self) -> Option` | Get partition ID (None if non-partitioned) | +| `fn bucket_id(&self) -> i32` | Get bucket ID | + +## `PartitionSpec` + +| Method | Description | +|---|---| +| `PartitionSpec::new(spec_map: HashMap<&str, &str>) -> Self` | Create from a map of partition column names to values | +| `fn get_spec_map(&self) -> &HashMap` | Get the partition spec map | + +## `PartitionInfo` + +| Method | Description | +|---|---| +| `fn get_partition_id(&self) -> i64` | Get partition ID | +| `fn get_partition_name(&self) -> String` | Get partition name | + +## `DatabaseDescriptor` + +| Method | Description | +|---|---| +| `fn builder() -> DatabaseDescriptorBuilder` | Create a database descriptor builder | +| `fn comment(&self) -> Option<&str>` | Get database comment | +| `fn custom_properties(&self) -> &HashMap` | Get custom properties | + +## `DatabaseDescriptorBuilder` + +| Method | Description | +|---|---| +| `fn comment(comment: impl Into) -> Self` | Set database comment | +| `fn custom_properties(properties: HashMap, impl Into>) -> Self` | Set custom properties | +| `fn custom_property(key: impl Into, value: impl Into) -> Self` | Set a single custom property | +| `fn build() -> DatabaseDescriptor` | Build the database descriptor | + +## `DatabaseInfo` + +| Method | Description | +|---|---| +| `fn database_name(&self) -> &str` | Get database name | +| `fn created_time(&self) -> i64` | Get creation timestamp | +| `fn modified_time(&self) -> i64` | Get last modification timestamp | +| `fn database_descriptor(&self) -> &DatabaseDescriptor` | Get the database descriptor | + +## `LakeSnapshot` + +| Field | Description | +|---|---| +| `.snapshot_id` | `i64` -- Snapshot ID | +| `.table_buckets_offset` | `HashMap` -- All bucket offsets | + +## `GenericRow<'a>` + +| Method | Description | +|---|---| +| `GenericRow::new(field_count: usize) -> Self` | Create a new row with the given number of fields | +| `fn set_field(&mut self, pos: usize, value: impl Into>)` | Set a field value by position | +| `GenericRow::from_data(data: Vec>>) -> Self` | Create a row from existing field data | + +Implements the `InternalRow` trait (see below). + +## `InternalRow` trait + +| Method | Description | +|---|---| +| `fn get_boolean(&self, idx: usize) -> bool` | Get boolean value | +| `fn get_byte(&self, idx: usize) -> i8` | Get tinyint value | +| `fn get_short(&self, idx: usize) -> i16` | Get smallint value | +| `fn get_int(&self, idx: usize) -> i32` | Get int value | +| `fn get_long(&self, idx: usize) -> i64` | Get bigint value | +| `fn get_float(&self, idx: usize) -> f32` | Get float value | +| `fn get_double(&self, idx: usize) -> f64` | Get double value | +| `fn get_string(&self, idx: usize) -> &str` | Get string value | +| `fn get_decimal(&self, idx: usize, precision: u32, scale: u32) -> Decimal` | Get decimal value | +| `fn get_date(&self, idx: usize) -> Date` | Get date value | +| `fn get_time(&self, idx: usize) -> Time` | Get time value | +| `fn get_timestamp_ntz(&self, idx: usize, precision: u32) -> TimestampNtz` | Get timestamp value | +| `fn get_timestamp_ltz(&self, idx: usize, precision: u32) -> TimestampLtz` | Get timestamp with local timezone value | +| `fn get_bytes(&self, idx: usize) -> &[u8]` | Get bytes value | +| `fn get_binary(&self, idx: usize, length: usize) -> &[u8]` | Get fixed-length binary value | +| `fn get_char(&self, idx: usize, length: usize) -> &str` | Get fixed-length char value | + +## `ChangeType` + +| Value | Short String | Description | +|---|---|---| +| `ChangeType::AppendOnly` | `+A` | Append-only record | +| `ChangeType::Insert` | `+I` | Inserted row | +| `ChangeType::UpdateBefore` | `-U` | Previous value of an updated row | +| `ChangeType::UpdateAfter` | `+U` | New value of an updated row | +| `ChangeType::Delete` | `-D` | Deleted row | + +| Method | Description | +|---|---| +| `fn short_string(&self) -> &str` | Get the short string representation | + +## `OffsetSpec` + +| Variant | Description | +|---|---| +| `OffsetSpec::Earliest` | Start from the earliest available offset | +| `OffsetSpec::Latest` | Start from the latest offset (only new records) | +| `OffsetSpec::Timestamp(i64)` | Start from a specific timestamp in milliseconds | + +## Constants + +| Constant | Value | Description | +|---|---|---| +| `fluss::client::EARLIEST_OFFSET` | `-2` | Start reading from the earliest available offset | +| `fluss::client::LATEST_OFFSET` | `-1` | Start reading from the latest offset (only new records) | + +## `DataTypes` factory + +| Method | Returns | Description | +|---|---|---| +| `DataTypes::boolean()` | `DataType` | Boolean type | +| `DataTypes::tinyint()` | `DataType` | 8-bit signed integer | +| `DataTypes::smallint()` | `DataType` | 16-bit signed integer | +| `DataTypes::int()` | `DataType` | 32-bit signed integer | +| `DataTypes::bigint()` | `DataType` | 64-bit signed integer | +| `DataTypes::float()` | `DataType` | 32-bit floating point | +| `DataTypes::double()` | `DataType` | 64-bit floating point | +| `DataTypes::string()` | `DataType` | Variable-length string | +| `DataTypes::bytes()` | `DataType` | Variable-length byte array | +| `DataTypes::date()` | `DataType` | Date (days since epoch) | +| `DataTypes::time()` | `DataType` | Time (milliseconds since midnight) | +| `DataTypes::timestamp()` | `DataType` | Timestamp without timezone | +| `DataTypes::timestamp_ltz()` | `DataType` | Timestamp with local timezone | +| `DataTypes::decimal(precision: u32, scale: u32)` | `DataType` | Fixed-point decimal | +| `DataTypes::char(length: u32)` | `DataType` | Fixed-length string | +| `DataTypes::binary(length: usize)` | `DataType` | Fixed-length byte array | +| `DataTypes::array(element: DataType)` | `DataType` | Array of elements | +| `DataTypes::map(key: DataType, value: DataType)` | `DataType` | Map of key-value pairs | +| `DataTypes::row(fields: Vec)` | `DataType` | Nested row type | + +## `DataField` + +| Method | Description | +|---|---| +| `DataField::new(name: impl Into, data_type: DataType, description: Option) -> DataField` | Create a data field | +| `fn name(&self) -> &str` | Get the field name | diff --git a/website/docs/user-guide/rust/data-types.md b/website/docs/user-guide/rust/data-types.md new file mode 100644 index 00000000..8b374ca5 --- /dev/null +++ b/website/docs/user-guide/rust/data-types.md @@ -0,0 +1,46 @@ +--- +sidebar_position: 3 +--- +# Data Types + +| Fluss Type | Rust Type | Getter | Setter | +|---|---|---|---| +| `BOOLEAN` | `bool` | `get_boolean()` | `set_field(idx, bool)` | +| `TINYINT` | `i8` | `get_byte()` | `set_field(idx, i8)` | +| `SMALLINT` | `i16` | `get_short()` | `set_field(idx, i16)` | +| `INT` | `i32` | `get_int()` | `set_field(idx, i32)` | +| `BIGINT` | `i64` | `get_long()` | `set_field(idx, i64)` | +| `FLOAT` | `f32` | `get_float()` | `set_field(idx, f32)` | +| `DOUBLE` | `f64` | `get_double()` | `set_field(idx, f64)` | +| `CHAR` | `&str` | `get_char(idx, length)` | `set_field(idx, &str)` | +| `STRING` | `&str` | `get_string()` | `set_field(idx, &str)` | +| `DECIMAL` | `Decimal` | `get_decimal(idx, precision, scale)` | `set_field(idx, Decimal)` | +| `DATE` | `Date` | `get_date()` | `set_field(idx, Date)` | +| `TIME` | `Time` | `get_time()` | `set_field(idx, Time)` | +| `TIMESTAMP` | `TimestampNtz` | `get_timestamp_ntz(idx, precision)` | `set_field(idx, TimestampNtz)` | +| `TIMESTAMP_LTZ` | `TimestampLtz` | `get_timestamp_ltz(idx, precision)` | `set_field(idx, TimestampLtz)` | +| `BYTES` | `&[u8]` | `get_bytes()` | `set_field(idx, &[u8])` | +| `BINARY(n)` | `&[u8]` | `get_binary(idx, length)` | `set_field(idx, &[u8])` | + +## Constructing Special Types + +Primitive types (`bool`, `i8`, `i16`, `i32`, `i64`, `f32`, `f64`, `&str`, `&[u8]`) can be passed directly to `set_field`. The following types require explicit construction: + +```rust +use fluss::row::{Date, Time, TimestampNtz, TimestampLtz, Decimal}; + +// Date: days since Unix epoch +let date = Date::new(19738); + +// Time: milliseconds since midnight +let time = Time::new(43200000); + +// Timestamp without timezone: milliseconds since epoch +let ts = TimestampNtz::new(1704067200000); + +// Timestamp with local timezone: milliseconds since epoch +let ts_ltz = TimestampLtz::new(1704067200000); + +// Decimal: from an unscaled long value with precision and scale +let decimal = Decimal::from_unscaled_long(12345, 10, 2)?; // represents 123.45 +``` diff --git a/website/docs/user-guide/rust/error-handling.md b/website/docs/user-guide/rust/error-handling.md new file mode 100644 index 00000000..eb7d8e22 --- /dev/null +++ b/website/docs/user-guide/rust/error-handling.md @@ -0,0 +1,151 @@ +--- +sidebar_position: 4 +--- +# Error Handling + +The Fluss Rust client uses a unified `Error` type and a `Result` alias for all fallible operations. + +## Basic Usage + +```rust +use fluss::error::{Error, Result}; + +// All operations return Result +let conn = FlussConnection::new(config).await?; +let admin = conn.get_admin().await?; +let table = conn.get_table(&table_path).await?; +``` + +Use the `?` operator to propagate errors, or `match` on specific variants for fine-grained handling. + +## Matching Error Variants + +```rust +use fluss::error::Error; + +match result { + Ok(val) => { + // handle success + } + Err(Error::InvalidTableError { message }) => { + eprintln!("Invalid table: {}", message); + } + Err(Error::RpcError { message, source }) => { + eprintln!("RPC failure: {}", message); + } + Err(Error::FlussAPIError { api_error }) => { + eprintln!("Server error: {}", api_error); + } + Err(e) => { + eprintln!("Unexpected error: {}", e); + } +} +``` + +## Error Variants + +| Variant | Description | +|---|---| +| `UnexpectedError` | General unexpected errors with a message and optional source | +| `IoUnexpectedError` | I/O errors (network, file system) | +| `RemoteStorageUnexpectedError` | Remote storage errors (OpenDAL backend failures) | +| `InvalidTableError` | Invalid table configuration or table not found | +| `RpcError` | RPC communication failures (connection refused, timeout) | +| `RowConvertError` | Row conversion failures (type mismatch, invalid data) | +| `ArrowError` | Arrow data handling errors (schema mismatch, encoding) | +| `IllegalArgument` | Invalid arguments passed to an API method | +| `InvalidPartition` | Invalid partition configuration | +| `PartitionNotExist` | Partition does not exist | +| `UnsupportedOperation` | Operation not supported on the table type | +| `LeaderNotAvailable` | Leader not available for the requested bucket | +| `FlussAPIError` | Server-side API errors returned by the Fluss cluster | + +## Common Error Scenarios + +### Connection Refused + +The Fluss cluster is not running or the address is incorrect. + +```rust +let result = FlussConnection::new(config).await; +match result { + Err(Error::RpcError { message, .. }) => { + eprintln!("Cannot connect to cluster: {}", message); + } + _ => {} +} +``` + +### Table Not Found + +The table does not exist or has been dropped. + +```rust +let result = conn.get_table(&table_path).await; +match result { + Err(Error::InvalidTableError { message }) => { + eprintln!("Table not found: {}", message); + } + _ => {} +} +``` + +### Partition Not Found + +The partition does not exist on a partitioned table. + +```rust +let result = admin.drop_partition(&table_path, &spec, false).await; +match result { + Err(Error::PartitionNotExist { .. }) => { + eprintln!("Partition does not exist"); + } + _ => {} +} +``` + +### Schema Mismatch + +Row data does not match the expected table schema. + +```rust +let result = writer.append(&row); +match result { + Err(Error::RowConvertError { .. }) => { + eprintln!("Row does not match table schema"); + } + _ => {} +} +``` + +## Using `Result` in Application Code + +The `fluss::error::Result` type alias makes it easy to use Fluss errors with the `?` operator in your application functions: + +```rust +use fluss::error::Result; + +async fn my_pipeline() -> Result<()> { + let conn = FlussConnection::new(config).await?; + let admin = conn.get_admin().await?; + let table = conn.get_table(&table_path).await?; + let writer = table.new_append()?.create_writer()?; + writer.append(&row)?; + writer.flush().await?; + Ok(()) +} +``` + +For applications that use other error types alongside Fluss errors, you can convert with standard `From` / `Into` traits or use crates like `anyhow`: + +```rust +use anyhow::Result; + +#[tokio::main] +async fn main() -> Result<()> { + let conn = FlussConnection::new(config).await?; + // fluss::error::Error implements std::error::Error, + // so it converts into anyhow::Error automatically + Ok(()) +} +``` diff --git a/website/docs/user-guide/rust/example/_category_.json b/website/docs/user-guide/rust/example/_category_.json new file mode 100644 index 00000000..dd222949 --- /dev/null +++ b/website/docs/user-guide/rust/example/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Example", + "position": 5 +} diff --git a/website/docs/user-guide/rust/example/admin-operations.md b/website/docs/user-guide/rust/example/admin-operations.md new file mode 100644 index 00000000..631d9196 --- /dev/null +++ b/website/docs/user-guide/rust/example/admin-operations.md @@ -0,0 +1,118 @@ +--- +sidebar_position: 3 +--- +# Admin Operations + +## Get Admin Interface + +```rust +let admin = conn.get_admin().await?; +``` + +## Database Operations + +```rust +// Create database +admin.create_database("my_database", true, None).await?; + +// List all databases +let databases = admin.list_databases().await?; +println!("Databases: {:?}", databases); + +// Check if database exists +let exists = admin.database_exists("my_database").await?; + +// Get database information +let db_info = admin.get_database_info("my_database").await?; + +// Drop database +admin.drop_database("my_database", true, false).await?; +``` + +## Table Operations + +```rust +use fluss::metadata::{DataTypes, Schema, TableDescriptor, TablePath}; + +let table_descriptor = TableDescriptor::builder() + .schema( + Schema::builder() + .column("id", DataTypes::int()) + .column("name", DataTypes::string()) + .column("amount", DataTypes::bigint()) + .build()?, + ) + .build()?; + +let table_path = TablePath::new("my_database", "my_table"); + +// Create table +admin.create_table(&table_path, &table_descriptor, true).await?; + +// Get table information +let table_info = admin.get_table(&table_path).await?; +println!("Table: {}", table_info); + +// List tables in database +let tables = admin.list_tables("my_database").await?; + +// Check if table exists +let exists = admin.table_exists(&table_path).await?; + +// Drop table +admin.drop_table(&table_path, true).await?; +``` + +## Partition Operations + +```rust +use fluss::metadata::PartitionSpec; +use std::collections::HashMap; + +// List all partitions +let partitions = admin.list_partition_infos(&table_path).await?; + +// List partitions matching a spec +let mut filter = HashMap::new(); +filter.insert("year", "2024"); +let spec = PartitionSpec::new(filter); +let partitions = admin.list_partition_infos_with_spec(&table_path, Some(&spec)).await?; + +// Create partition +admin.create_partition(&table_path, &spec, true).await?; + +// Drop partition +admin.drop_partition(&table_path, &spec, true).await?; +``` + +## Offset Operations + +```rust +use fluss::rpc::message::OffsetSpec; + +let bucket_ids = vec![0, 1, 2]; + +// Get earliest offsets +let earliest = admin.list_offsets(&table_path, &bucket_ids, OffsetSpec::Earliest).await?; + +// Get latest offsets +let latest = admin.list_offsets(&table_path, &bucket_ids, OffsetSpec::Latest).await?; + +// Get offsets for a specific timestamp +let timestamp_ms = 1704067200000; // 2024-01-01 00:00:00 UTC +let offsets = admin.list_offsets( + &table_path, &bucket_ids, OffsetSpec::Timestamp(timestamp_ms), +).await?; + +// Get offsets for a specific partition +let partition_offsets = admin.list_partition_offsets( + &table_path, "partition_name", &bucket_ids, OffsetSpec::Latest, +).await?; +``` + +## Lake Snapshot + +```rust +let snapshot = admin.get_latest_lake_snapshot(&table_path).await?; +println!("Snapshot ID: {}", snapshot.snapshot_id); +``` diff --git a/website/docs/user-guide/rust/example/configuration.md b/website/docs/user-guide/rust/example/configuration.md new file mode 100644 index 00000000..2df736f3 --- /dev/null +++ b/website/docs/user-guide/rust/example/configuration.md @@ -0,0 +1,24 @@ +--- +sidebar_position: 2 +--- +# Configuration + +```rust +use fluss::client::FlussConnection; +use fluss::config::Config; + +let mut config = Config::default(); +config.bootstrap_server = "127.0.0.1:9123".to_string(); + +let conn = FlussConnection::new(config).await?; +``` + +## Configuration Options + +| Option | Description | Default | +|--------|-------------|---------| +| `bootstrap_server` | Coordinator server address | `127.0.0.1:9123` | +| `request_max_size` | Maximum request size in bytes | 10 MB | +| `writer_acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | +| `writer_retries` | Number of retries on failure | `i32::MAX` | +| `writer_batch_size` | Batch size for writes | 2 MB | diff --git a/website/docs/user-guide/rust/example/index.md b/website/docs/user-guide/rust/example/index.md new file mode 100644 index 00000000..b1c99746 --- /dev/null +++ b/website/docs/user-guide/rust/example/index.md @@ -0,0 +1,56 @@ +--- +sidebar_position: 1 +--- +# Example + +Minimal working examples: connect to Fluss, create a table, write data, and read it back. + +```rust +use fluss::client::FlussConnection; +use fluss::config::Config; +use fluss::error::Result; +use fluss::metadata::{DataTypes, Schema, TableDescriptor, TablePath}; +use fluss::row::{GenericRow, InternalRow}; +use std::time::Duration; + +#[tokio::main] +async fn main() -> Result<()> { + // Connect + let mut config = Config::default(); + config.bootstrap_server = "127.0.0.1:9123".to_string(); + let conn = FlussConnection::new(config).await?; + let admin = conn.get_admin().await?; + + // Create a log table + let table_path = TablePath::new("fluss", "quickstart_rust"); + let descriptor = TableDescriptor::builder() + .schema( + Schema::builder() + .column("id", DataTypes::int()) + .column("name", DataTypes::string()) + .build()?, + ) + .build()?; + admin.create_table(&table_path, &descriptor, true).await?; + + // Write + let table = conn.get_table(&table_path).await?; + let writer = table.new_append()?.create_writer()?; + let mut row = GenericRow::new(2); + row.set_field(0, 1); + row.set_field(1, "hello"); + writer.append(&row)?; + writer.flush().await?; + + // Read + let scanner = table.new_scan().create_log_scanner()?; + scanner.subscribe(0, 0).await?; + let records = scanner.poll(Duration::from_secs(5)).await?; + for record in records { + let row = record.row(); + println!("id={}, name={}", row.get_int(0), row.get_string(1)); + } + + Ok(()) +} +``` diff --git a/website/docs/user-guide/rust/example/log-tables.md b/website/docs/user-guide/rust/example/log-tables.md new file mode 100644 index 00000000..1672a95b --- /dev/null +++ b/website/docs/user-guide/rust/example/log-tables.md @@ -0,0 +1,124 @@ +--- +sidebar_position: 4 +--- +# Log Tables + +Log tables are append-only tables without primary keys, suitable for event streaming. + +## Creating a Log Table + +```rust +use fluss::metadata::{DataTypes, Schema, TableDescriptor, TablePath}; + +let table_descriptor = TableDescriptor::builder() + .schema( + Schema::builder() + .column("event_id", DataTypes::int()) + .column("event_type", DataTypes::string()) + .column("timestamp", DataTypes::bigint()) + .build()?, + ) + .build()?; + +let table_path = TablePath::new("fluss", "events"); +admin.create_table(&table_path, &table_descriptor, true).await?; +``` + +## Writing to Log Tables + +```rust +use fluss::row::{GenericRow, InternalRow}; + +let table = conn.get_table(&table_path).await?; +let append_writer = table.new_append()?.create_writer()?; + +let mut row = GenericRow::new(3); +row.set_field(0, 1); // event_id +row.set_field(1, "user_login"); // event_type +row.set_field(2, 1704067200000i64); // timestamp + +append_writer.append(&row)?; +append_writer.flush().await?; +``` + +Write operations use a **fire-and-forget** pattern for efficient batching. Each call queues the write and returns a `WriteResultFuture` immediately. Call `flush()` to ensure all queued writes are sent to the server. + +For per-record acknowledgment: + +```rust +append_writer.append(&row)?.await?; +``` + +## Reading from Log Tables + +```rust +use std::time::Duration; + +let table = conn.get_table(&table_path).await?; +let log_scanner = table.new_scan().create_log_scanner()?; + +// Subscribe to bucket 0 starting from offset 0 +log_scanner.subscribe(0, 0).await?; + +// Poll for records +let records = log_scanner.poll(Duration::from_secs(10)).await?; + +for record in records { + let row = record.row(); + println!( + "event_id={}, event_type={}, timestamp={} @ offset={}", + row.get_int(0), + row.get_string(1), + row.get_long(2), + record.offset() + ); +} +``` + +**Subscribe from special offsets:** + +```rust +use fluss::client::{EARLIEST_OFFSET, LATEST_OFFSET}; + +log_scanner.subscribe(0, EARLIEST_OFFSET).await?; // from earliest +log_scanner.subscribe(0, LATEST_OFFSET).await?; // only new records +log_scanner.subscribe(0, 42).await?; // from specific offset +``` + +**Subscribe to all buckets:** + +```rust +let num_buckets = table.get_table_info().get_num_buckets(); +for bucket_id in 0..num_buckets { + log_scanner.subscribe(bucket_id, 0).await?; +} +``` + +**Subscribe to multiple buckets at once:** + +```rust +use std::collections::HashMap; + +let mut bucket_offsets = HashMap::new(); +bucket_offsets.insert(0, 0i64); +bucket_offsets.insert(1, 100i64); +log_scanner.subscribe_buckets(&bucket_offsets).await?; +``` + +**Unsubscribe from a partition bucket:** + +```rust +log_scanner.unsubscribe_partition(partition_id, bucket_id).await?; +``` + +## Column Projection + +```rust +// Project by column index +let scanner = table.new_scan().project(&[0, 2])?.create_log_scanner()?; + +// Project by column name +let scanner = table.new_scan() + .project_by_name(&["event_id", "timestamp"])? + .create_log_scanner()?; +``` diff --git a/website/docs/user-guide/rust/example/partitioned-tables.md b/website/docs/user-guide/rust/example/partitioned-tables.md new file mode 100644 index 00000000..a1d2475d --- /dev/null +++ b/website/docs/user-guide/rust/example/partitioned-tables.md @@ -0,0 +1,215 @@ +--- +sidebar_position: 6 +--- +# Partitioned Tables + +Partitioned tables distribute data across partitions based on partition column values, enabling efficient data organization and querying. Both log tables and primary key tables support partitioning. + +## Partitioned Log Tables + +### Creating a Partitioned Log Table + +```rust +use fluss::metadata::{DataTypes, LogFormat, Schema, TableDescriptor, TablePath}; + +let table_descriptor = TableDescriptor::builder() + .schema( + Schema::builder() + .column("event_id", DataTypes::int()) + .column("event_type", DataTypes::string()) + .column("dt", DataTypes::string()) + .column("region", DataTypes::string()) + .build()?, + ) + .partitioned_by(vec!["dt", "region"]) + .log_format(LogFormat::ARROW) + .build()?; + +let table_path = TablePath::new("fluss", "partitioned_events"); +admin.create_table(&table_path, &table_descriptor, true).await?; +``` + +### Writing to Partitioned Log Tables + +**Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** Include partition column values in each row — the client routes records to the correct partition automatically. + +```rust +use fluss::metadata::PartitionSpec; +use std::collections::HashMap; + +let table = conn.get_table(&table_path).await?; + +// Create the partition before writing +let mut partition_values = HashMap::new(); +partition_values.insert("dt", "2024-01-15"); +partition_values.insert("region", "US"); +admin.create_partition(&table_path, &PartitionSpec::new(partition_values), true).await?; + +let append_writer = table.new_append()?.create_writer()?; + +let mut row = GenericRow::new(4); +row.set_field(0, 1); // event_id +row.set_field(1, "user_login"); // event_type +row.set_field(2, "2024-01-15"); // dt (partition column) +row.set_field(3, "US"); // region (partition column) + +append_writer.append(&row)?; +append_writer.flush().await?; +``` + +### Reading from Partitioned Log Tables + +For partitioned tables, use partition-aware subscribe methods. + +```rust +use std::time::Duration; + +let table = conn.get_table(&table_path).await?; +let admin = conn.get_admin().await?; +let partitions = admin.list_partition_infos(&table_path).await?; + +let log_scanner = table.new_scan().create_log_scanner()?; + +// Subscribe to each partition's buckets +for partition_info in &partitions { + let partition_id = partition_info.get_partition_id(); + let num_buckets = table.get_table_info().get_num_buckets(); + for bucket_id in 0..num_buckets { + log_scanner.subscribe_partition(partition_id, bucket_id, 0).await?; + } +} + +let records = log_scanner.poll(Duration::from_secs(10)).await?; +for record in records { + println!("Record: {:?}", record.row()); +} +``` + +Subscribe to multiple partition-buckets at once: + +```rust +use std::collections::HashMap; + +let mut partition_bucket_offsets = HashMap::new(); +partition_bucket_offsets.insert((partition_id, 0), 0i64); +partition_bucket_offsets.insert((partition_id, 1), 0i64); +log_scanner.subscribe_partition_buckets(&partition_bucket_offsets).await?; +``` + +### Managing Partitions + +```rust +use fluss::metadata::PartitionSpec; +use std::collections::HashMap; + +// Create a partition +let mut partition_values = HashMap::new(); +partition_values.insert("dt", "2024-01-15"); +partition_values.insert("region", "EMEA"); +let spec = PartitionSpec::new(partition_values); +admin.create_partition(&table_path, &spec, true).await?; + +// List all partitions +let partitions = admin.list_partition_infos(&table_path).await?; +for partition in &partitions { + println!( + "Partition: id={}, name={}", + partition.get_partition_id(), + partition.get_partition_name() + ); +} + +// List with filter +let mut partial_values = HashMap::new(); +partial_values.insert("dt", "2024-01-15"); +let partial_spec = PartitionSpec::new(partial_values); +let filtered = admin.list_partition_infos_with_spec( + &table_path, Some(&partial_spec), +).await?; + +// Drop a partition +admin.drop_partition(&table_path, &spec, true).await?; +``` + +## Partitioned Primary Key Tables + +Partitioned KV tables combine partitioning with primary key operations. Partition columns must be part of the primary key. + +### Creating a Partitioned Primary Key Table + +```rust +use fluss::metadata::{DataTypes, KvFormat, Schema, TableDescriptor, TablePath}; + +let table_descriptor = TableDescriptor::builder() + .schema( + Schema::builder() + .column("user_id", DataTypes::int()) + .column("region", DataTypes::string()) + .column("zone", DataTypes::bigint()) + .column("score", DataTypes::bigint()) + .primary_key(vec!["user_id", "region", "zone"]) + .build()?, + ) + .partitioned_by(vec!["region", "zone"]) + .kv_format(KvFormat::COMPACTED) + .build()?; + +let table_path = TablePath::new("fluss", "partitioned_users"); +admin.create_table(&table_path, &table_descriptor, true).await?; +``` + +### Writing to Partitioned Primary Key Tables + +**Partitions must exist before upserting data, otherwise the client will by default retry indefinitely.** + +```rust +use fluss::metadata::PartitionSpec; +use std::collections::HashMap; + +let table = conn.get_table(&table_path).await?; + +// Create partitions first +for (region, zone) in [("APAC", "1"), ("EMEA", "2"), ("US", "3")] { + let mut values = HashMap::new(); + values.insert("region", region); + values.insert("zone", zone); + admin.create_partition(&table_path, &PartitionSpec::new(values), true).await?; +} + +let table_upsert = table.new_upsert()?; +let upsert_writer = table_upsert.create_writer()?; + +for (user_id, region, zone, score) in [ + (1001, "APAC", 1i64, 1234i64), + (1002, "EMEA", 2, 2234), + (1003, "US", 3, 3234), +] { + let mut row = GenericRow::new(4); + row.set_field(0, user_id); + row.set_field(1, region); + row.set_field(2, zone); + row.set_field(3, score); + upsert_writer.upsert(&row)?; +} +upsert_writer.flush().await?; +``` + +### Looking Up Records in Partitioned Tables + +Lookup requires all primary key columns including partition columns. + +```rust +let mut lookuper = table.new_lookup()?.create_lookuper()?; + +let mut key = GenericRow::new(3); +key.set_field(0, 1001); // user_id +key.set_field(1, "APAC"); // region (partition column) +key.set_field(2, 1i64); // zone (partition column) + +let result = lookuper.lookup(&key).await?; +if let Some(row) = result.get_single_row()? { + println!("Found: score={}", row.get_long(3)); +} +``` + +> **Note:** Scanning partitioned primary key tables is not supported. Use lookup operations instead. diff --git a/website/docs/user-guide/rust/example/primary-key-tables.md b/website/docs/user-guide/rust/example/primary-key-tables.md new file mode 100644 index 00000000..5b299cca --- /dev/null +++ b/website/docs/user-guide/rust/example/primary-key-tables.md @@ -0,0 +1,114 @@ +--- +sidebar_position: 5 +--- +# Primary Key Tables + +Primary key tables (KV tables) support upsert, delete, and lookup operations. + +## Creating a Primary Key Table + +```rust +use fluss::metadata::{DataTypes, Schema, TableDescriptor, TablePath}; + +let table_descriptor = TableDescriptor::builder() + .schema( + Schema::builder() + .column("id", DataTypes::int()) + .column("name", DataTypes::string()) + .column("age", DataTypes::bigint()) + .primary_key(vec!["id"]) + .build()?, + ) + .build()?; + +let table_path = TablePath::new("fluss", "users"); +admin.create_table(&table_path, &table_descriptor, true).await?; +``` + +## Upserting Records + +```rust +use fluss::row::{GenericRow, InternalRow}; + +let table = conn.get_table(&table_path).await?; +let table_upsert = table.new_upsert()?; +let upsert_writer = table_upsert.create_writer()?; + +for (id, name, age) in [(1, "Alice", 25i64), (2, "Bob", 30), (3, "Charlie", 35)] { + let mut row = GenericRow::new(3); + row.set_field(0, id); + row.set_field(1, name); + row.set_field(2, age); + upsert_writer.upsert(&row)?; +} +upsert_writer.flush().await?; +``` + +## Updating Records + +Upsert with the same primary key to update an existing record. + +```rust +let mut row = GenericRow::new(3); +row.set_field(0, 1); // id (primary key) +row.set_field(1, "Alice"); +row.set_field(2, 26i64); // updated age + +upsert_writer.upsert(&row)?; +upsert_writer.flush().await?; +``` + +## Deleting Records + +```rust +// Only primary key field needs to be set +let mut row = GenericRow::new(3); +row.set_field(0, 2); // id of record to delete + +upsert_writer.delete(&row)?; +upsert_writer.flush().await?; +``` + +## Partial Updates + +Update only specific columns while preserving others. + +```rust +// By column indices +let partial_upsert = table_upsert.partial_update(Some(vec![0, 2]))?; +let partial_writer = partial_upsert.create_writer()?; + +let mut row = GenericRow::new(3); +row.set_field(0, 1); // id (primary key, required) +row.set_field(2, 27i64); // age (will be updated) +// name will remain unchanged + +partial_writer.upsert(&row)?; +partial_writer.flush().await?; + +// By column names +let partial_upsert = table_upsert.partial_update_with_column_names(&["id", "age"])?; +let partial_writer = partial_upsert.create_writer()?; +``` + +## Looking Up Records + +```rust +let mut lookuper = table.new_lookup()?.create_lookuper()?; + +let mut key = GenericRow::new(1); +key.set_field(0, 1); // id to lookup + +let result = lookuper.lookup(&key).await?; + +if let Some(row) = result.get_single_row()? { + println!( + "Found: id={}, name={}, age={}", + row.get_int(0), + row.get_string(1), + row.get_long(2) + ); +} else { + println!("Record not found"); +} +``` diff --git a/website/docs/user-guide/rust/installation.md b/website/docs/user-guide/rust/installation.md new file mode 100644 index 00000000..1fe36fd4 --- /dev/null +++ b/website/docs/user-guide/rust/installation.md @@ -0,0 +1,76 @@ +--- +sidebar_position: 1 +--- +# Installation + +The Fluss Rust client is published to [crates.io](https://crates.io/crates/fluss-rs) as `fluss-rs`. The crate's library name is `fluss`, so you import it with `use fluss::...`. + +```toml +[dependencies] +fluss-rs = "0.1" +tokio = { version = "1", features = ["full"] } +``` + +## Feature Flags + +```toml +[dependencies] +# Default: memory and filesystem storage +fluss-rs = "0.1" + +# With S3 storage support +fluss-rs = { version = "0.1", features = ["storage-s3"] } + +# With OSS storage support +fluss-rs = { version = "0.1", features = ["storage-oss"] } + +# All storage backends +fluss-rs = { version = "0.1", features = ["storage-all"] } +``` + +Available features: +- `storage-memory` (default) — In-memory storage +- `storage-fs` (default) — Local filesystem storage +- `storage-s3` — Amazon S3 storage +- `storage-oss` — Alibaba OSS storage +- `storage-all` — All storage backends + +## Git or Path Dependency + +For development against unreleased changes: + +```toml +[dependencies] +# From Git +fluss = { git = "https://github.com/apache/fluss-rust.git", package = "fluss-rs" } + +# From local path +fluss = { path = "/path/to/fluss-rust/crates/fluss", package = "fluss-rs" } +``` + +> **Note:** When using `git` or `path` dependencies, the `package = "fluss-rs"` field is required so that Cargo resolves the correct package while still allowing `use fluss::...` imports. + +## Building from Source + +**Prerequisites:** Rust 1.85+, Protobuf compiler (`protoc`) + +```bash +git clone https://github.com/apache/fluss-rust.git +cd fluss-rust +``` + +Install `protoc`: + +```bash +# macOS +brew install protobuf + +# Ubuntu/Debian +sudo apt-get install protobuf-compiler +``` + +Build: + +```bash +cargo build --workspace --all-targets +``` diff --git a/website/docusaurus.config.ts b/website/docusaurus.config.ts new file mode 100644 index 00000000..0d974e95 --- /dev/null +++ b/website/docusaurus.config.ts @@ -0,0 +1,84 @@ +import {themes as prismThemes} from 'prism-react-renderer'; +import type {Config} from '@docusaurus/types'; +import type * as Preset from '@docusaurus/preset-classic'; + +const config: Config = { + title: 'Apache Fluss Clients', + tagline: 'Rust, Python, and C++ clients for Apache Fluss', + favicon: 'img/logo/fluss_favicon.svg', + + url: 'https://fluss.apache.org/', + baseUrl: '/fluss-rust/', + + organizationName: 'apache', + projectName: 'fluss-rust', + + onBrokenLinks: 'throw', + + i18n: { + defaultLocale: 'en', + locales: ['en'], + }, + + presets: [ + [ + 'classic', + { + docs: { + routeBasePath: '/', + sidebarPath: './sidebars.ts', + editUrl: 'https://github.com/apache/fluss-rust/edit/main/website/', + }, + blog: false, + theme: { + customCss: './src/css/custom.css', + }, + } satisfies Preset.Options, + ], + ], + + themeConfig: { + image: 'img/logo/png/colored_logo.png', + colorMode: { + defaultMode: 'light', + disableSwitch: true, + }, + navbar: { + title: '', + logo: { + alt: 'Fluss', + src: 'img/logo/svg/colored_logo.svg', + }, + items: [ + { + type: 'docSidebar', + sidebarId: 'docsSidebar', + position: 'left', + label: 'Client Docs', + }, + { + href: 'https://fluss.apache.org/', + label: 'Fluss', + position: 'left', + }, + { + href: 'https://github.com/apache/fluss-rust', + position: 'right', + className: 'header-github-link', + 'aria-label': 'GitHub repository', + }, + ], + }, + footer: { + style: 'dark', + copyright: `Copyright © ${new Date().getFullYear()} The Apache Software Foundation, Licensed under the Apache License, Version 2.0.`, + }, + prism: { + theme: prismThemes.vsDark, + darkTheme: prismThemes.dracula, + additionalLanguages: ['rust', 'toml', 'bash', 'cmake'], + }, + } satisfies Preset.ThemeConfig, +}; + +export default config; diff --git a/website/package.json b/website/package.json new file mode 100644 index 00000000..644a7051 --- /dev/null +++ b/website/package.json @@ -0,0 +1,43 @@ +{ + "name": "fluss-clients-website", + "version": "0.0.0", + "private": true, + "scripts": { + "docusaurus": "docusaurus", + "start": "docusaurus start", + "build": "docusaurus build", + "swizzle": "docusaurus swizzle", + "clear": "docusaurus clear", + "serve": "docusaurus serve" + }, + "dependencies": { + "@docusaurus/core": "^3.9.2", + "@docusaurus/preset-classic": "^3.9.2", + "@mdx-js/react": "^3.0.0", + "clsx": "^2.0.0", + "prism-react-renderer": "^2.3.0", + "react": "^18.0.0", + "react-dom": "^18.0.0" + }, + "devDependencies": { + "@docusaurus/module-type-aliases": "^3.9.2", + "@docusaurus/tsconfig": "^3.9.2", + "@docusaurus/types": "^3.9.2", + "typescript": "~5.5.2" + }, + "browserslist": { + "production": [ + ">0.5%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 3 chrome version", + "last 3 firefox version", + "last 5 safari version" + ] + }, + "engines": { + "node": ">=20.0" + } +} diff --git a/website/sidebars.ts b/website/sidebars.ts new file mode 100644 index 00000000..97f33802 --- /dev/null +++ b/website/sidebars.ts @@ -0,0 +1,24 @@ +import type {SidebarsConfig} from '@docusaurus/plugin-content-docs'; + +const sidebars: SidebarsConfig = { + docsSidebar: [ + 'index', + { + type: 'category', + label: 'User Guide', + items: [ + {type: 'autogenerated', dirName: 'user-guide'}, + ], + }, + { + type: 'category', + label: 'Developer Guide', + items: [ + 'developer-guide/contributing', + 'developer-guide/release', + ], + }, + ], +}; + +export default sidebars; diff --git a/website/src/css/custom.css b/website/src/css/custom.css new file mode 100644 index 00000000..9143372f --- /dev/null +++ b/website/src/css/custom.css @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Copied from the main fluss.apache.org website (fluss/website/src/css/custom.css) + * to ensure visual consistency. + */ + +/* Import Inter font from Google Fonts */ +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); + +/* You can override the default Infima variables here. */ +:root { + --ifm-color-primary: #0071e3; + --ifm-color-primary-dark: #0066cc; + --ifm-color-primary-darker: #0060c1; + --ifm-color-primary-darkest: #004f9f; + --ifm-color-primary-light: #007cfa; + --ifm-color-primary-lighter: #0682ff; + --ifm-color-primary-lightest: #2893ff; + --ifm-code-font-size: 90%; + --ifm-font-family-base: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif; + --docusaurus-highlighted-code-line-bg: #E2E9F3; + + --ifm-menu-color-background-active: #edeefa99; + --ifm-menu-color-background-hover: #edeefa99; +} + + +.navbar__brand { + font-family: monaco; + color: inherit; +} + +.header-github-link:hover { + opacity: 0.6; +} + +.header-github-link::before { + content: ''; + width: 24px; + height: 24px; + display: flex; + background: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E") + no-repeat; +} + + +.menu__list-item { + font-size: 0.95rem; + font-weight: 500; +} + +.menu__link--sublist-caret:after { + background: var(--ifm-menu-link-sublist-icon) 50% / 1.5rem 1.5rem; +} + + +.markdown { + padding-left: 1rem; + h1, + h2, + h3, + h4, + h5, + h6 { + color: #1d1d1d; + margin-bottom: 0.3125rem; + font-weight: 700; + } + + b, + strong { + font-weight: 700; + color: #1d1d1d; + } + + h1, + h1:first-child { + font-size: 2.5rem; + margin-bottom: 1.5rem; + margin-top: 0; + } + + h2 { + font-size: 2rem; + margin-bottom: 1.25rem; + margin-top: 2rem; + padding-top: 2rem; + border-top: 1px solid #e6e7e9; + } + + h3 { + font-size: 1.5rem; + margin-bottom: 1.25rem; + margin-top: 1rem; + } + p { + line-height: 1.875; + + code { + border-radius: 4px; + background-color: #edf2fa; + border: none; + padding: 3px 4px; + font-size: 14px; + color: #4c576c; + } + } + + li > code { + border-radius: 4px; + background-color: #edf2fa; + border: none; + padding: 3px 4px; + font-size: 14px; + color: #4c576c; + } + + table thead tr { + background-color: #f7f9fe; + } + + table thead th { + background-color: #f7f9fe; + color: #1d1d1d; + font-size: 1rem; + font-weight: 500; + } + + table tr { + border-bottom: none; + background-color: var(--global-colors-white); + font-size: var(--global-font-size-small); + + code { + border-radius: 4px; + background-color: #edf2fa; + border: none; + padding: 3px 4px; + font-size: 14px; + color: #4c576c; + } + } + + table tr th { + padding: 0.53rem 0.8125rem; + border-color: #dfe5f0; + } + + table tr td { + padding: 0.65rem 0.8125rem; + border-color: #dfe5f0; + } + a { + color: var(--ifm-color-primary); + } + ul { + padding-left: 20px; + li { + margin-top: 4px; + position: relative; + list-style: initial; + } + } + ol { + padding-left: 20px; + li { + list-style: decimal; + } + } +} + +.theme-doc-markdown { + header { + margin-top: 1rem; + + & + h1 { + display: none; + } + } +} + +.breadcrumbs__item--active .breadcrumbs__link { + background: var(--ifm-menu-color-background-active); +} + +.footer__copyright { + color: #dfe5f0; + font-size: .75rem; + line-height: 1.8; + opacity: .6; + text-align: center; + width: 98%; +} diff --git a/website/static/img/logo/fluss_favicon.svg b/website/static/img/logo/fluss_favicon.svg new file mode 100644 index 00000000..7c044d55 --- /dev/null +++ b/website/static/img/logo/fluss_favicon.svg @@ -0,0 +1,19 @@ + + + \ No newline at end of file diff --git a/website/static/img/logo/png/colored_logo.png b/website/static/img/logo/png/colored_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..2cd7dd3749494998b019b51f206077d65501c352 GIT binary patch literal 45184 zcmZs@by!r}_Xj*6S40UB1Emy|5v5hS6bu@{k!}!_?q)y`5D>f$64KHg(lG-9j)-)3 z=Kw?30Q2rM-tX^y|9E*Gy_dbuS$plZJ}dV=eo$3@MsxPsSr`mPBllcZ4F)@{41(_PnD^3xE8n160V!8)X$KZ&`&3iJ)bt^(#%21kZ3s z&#vA5kbjQbom-k>m)rh)ryO&xWu*eorIBF8f>lbL?$4($ypf|n-K4knam0ObE%u-T z`T9h?58<4XBgK|XG+#Ct>%%PNEX5YsCOEz(w*2B%QX3${rKkpcl72JEy%2y-a z>vKXwFg{^-N1dumb=)$l|3JTgT(l=&xobP@-ojp~dMR*+92)dhPVf9~8dhPp_>uHG zEWhQ&$t*_4^M?G{8<2Kju-?xm-kcKdL658^86oO1Hpbz@bW)JGvDipZyCh;oMro_KoyhXC3oD z%4rMn8X^nbRX@gu0`I(k7ilJ&_9|Q(n!oc)RgX_e#Dn#-rC3UhyL{l(c9FgW-4f#c zfeJLw$lpMvs1VM23bpEW`^RUa+?>1N;o;wMawKIU?^4`>9)VqqOvR>DGG>xHew)1< zVNV*XOktLIf1ATCDT-C*;#LxAth4GOj-lHlx(LO3Dspr!f;0%rn>!!V=@;+FR!3S8 zmi$melv;s;Bkxt+_G3AgBP-HlMb8}0C@yMY$>i6jDM^s!lO9r! z{MM+?p{nuBF`298U;~mO%upV@$e0}Nk;K(l3nx9Mdp)Q=zz=svT2Aj+mGlswvIqr7 z*e{2wi;1KsIzO15QDV%F@=QvjMYfZM7%MN5GjJr&jl;8R*hHWq;Rzl2D=^s*XBE_| z*?1Glq>3_;ths(2+mDfIUR!Y!VOR9sj0W1gud|1<8!yoOU5J)*Tfpd>L%RoQAwE%$ zcxe$%?d3P$qN!ve)k21>E+Y`D)!B5iO~woFpj9T%zD_^qEAi#q`_5bG7hBCNQ@tL+ zb8nza4En3X(p$DRx%bAS;s}`v<*t3!dX5U5`G#zB@l3~m ztZdw)g`gE+uzrUJQZGD3W#uxjX0>6WRS?-}Jmae+ANj7g~;gqy?d*(hY38k$`gOIq=y9bM}J)c-Y+ z@FOIjQJQq{NPw2?6l$O%TyK^RTKp{%4VJSGx>2Jh%lxG29CtLtZGF?#-vQpAOne!B z6u!#szFlwefVsqy9+yO{)shrmkbG9W7OVEqo#aC^o<%V1Hi)y^aO*X_5iyr>dPF0( zE!q+%*nH?cxx(G-!H8KsMql;Fh8A9VD4`rveGw-&MR^lob1qr_-HSn^Z~N8d5(sF1 zP{t#k&$;4fJ;!_Ga(3o4unlg#(_!tj49x>y1S=*MXIh}g7%w+asqgt|Cc#)J7=Fk& ziYZ>4Xv)7q($S!T&=($#|`>@H<%T!I7Vp)`*#i1@kb^dR4v7;>gdP6r!rhS2UIhB>IadLPNJini!MAdTV2D`$pe|pm%cCG6p{U={=*(>_al{4@VF0maw}I;{P4QN*x=M3? z#klr#wh`~PxZKeT>f|d-&@*G*K);_-J1I7D5ykC1MM54c&fl;k9$ERYK+Dl?YCuF% z9`U!(%eW8FD|aA`Jqb_rOyG)En4;QF%hZPl{S{t}xPB0)o1E;gtjE;k?-lSYtUMe$b!BxSvKz#|-A!E@+k|cTQxMaP z{${0i-^x}|_3Ez{C@R)XYt`iIh@+(&-)pO&-NIJNm_6dQjjMF~eY_pp(_1q#asxO< zbmFGu5*F%aqlP^#p~nmrZSLy9sGo-iYvnCGNR}C?;#vMBBILnr#&tPV+wmhaD{J}g zDEFTPV*xjas_%UV#*ADBC6BmxG{2Y9o_$VvQ5oNE^R`czXC80?~I3{lR|NjIH7!vrc=}noPL1yD>!N zsfr!>f`ap!c+$Xct-(kB4Aa{dO$~#OmPqEyc)Ec~m-h>gVb1Wf65ljxYgAf?usfOT zLi_pu^ZtNB*;@SPlxjnh0lU2X$+W2s(^N``3qJ0543&vDJWsDJ{&KoFs_DO;wo8&| z3@b#puj>1OQbGH9oozW4K8TsG=i}?k)K)2lMb4?;_y-Eh(stzLY1*!rzxHO;dYGb-gt?dbF3sKd5?Oc}v2`Ef zqW=7-ZZ(YYy!ou>C$*5A92buxNiD9+1IH5$4GqZ#K?h#rcf_x5?vXH66cI&VT)k`! zB~cmexY~$9_jdO$j_7X7&uZEr3^ddUYUC5Q_qyKCIcV((>y+4CdmJNl`}LIZOgXZp01%DcZl;k82uKc(14+%PPj98J_xYUhv?|t= zZLe~=I`B&bJ2hqW5`m~DrO}KR8>nRe;6}A99C{=*_|16A*J;VE_mf=Y?>GCAqxh4y)&DfZCjZ%$NV za-3ABq3v8}hq9-4YO-87alwLQxD8^yqF3D}3et zR$S5doSdg@Ul*-ON`?c{xfdS>Ejbv+W`FIlGz(5OHYQp7@`d=KLbWl(6eubgc-(=V+6A6uX%Ulv{? zXiM;-2Emo1Q0pz88ww-7#Za&-qOMcAV>mLBDvM{3P`Z{4&b&e?Ux`Yqj*9@?9!nBvo`8ml11S6?H!uJts>_Nr}ngI($NDpY6F z#z77{7~2KnW~}g zaQ^W(Y{K9&jS?!$$Iv+lLx2gB{ zCXj-0tKNm#ppM|6BN)oocG{2t8kQiU%vj9~mr&Hc`t{zlDJvD@*Jk4vo-qaSj_|Uu zCyh6%*{OR-NDq4+emQ7X)cd~p6&ZwUa6wN+q}tME=-|}D)_k-Hv=5lmt!I=*ExQ~K zjIvy)l*%i-sa4eEl?-^HIp0UET}Fl%9y6TI``Bsf3G(--R?^yCx3{zF z^!x7i@#vm0lIoIzW=245aRRGUBi;d`m-?F)H+2AIoK& zfY7z!x4A9nH8NKPdPiLr3-l5(;pm{|5u@OOyZwKW2ixYrdfbWEFR&YE4amJa#FW}n zEU}HRYQ53M^L=Iy#pAU=@{5bo;q(bFUrW)L@q7xk(R)una^5U@GQ~-=Y6jh^;YJ5x z-PZ&pl~*|X8y(VllG5-~zA+$3ga?&gdxws|=j2Q;E-qH%ku|j}fmS0?CqtmMKugCI zQG$4XqZ-<)A8E5BJvnzPkX2 zNKJD2_2CfE{G(FmI-81~)IZTaqjdSk1JWDMKs@^wT7jHlQk?reN7WZ&Xaj?mVEh%p zZD(q;aj~U^+Iv!Z>l?Lb|Kp8_Hmy=rYx;*0zjI}sJ!xlA4UjN9zk$G9Z^)I}+<9w( zIk!*Vf;>JqCkI5>$D~Z-^fSB1;lT*mw0=Pg3!cdf0NAv2IxS)W9MFQL?aXij_p{bI zMGB63v&*4RX5&W&BmXX{95Z8zCV8De3ROohB zQ~vryqpX7C&yAhB(jByPbgl^$K9=-<84%6Dww(L|`}P1fW-J(2cJ=G5pdcy3isr|* zY=brrOv2LMqTo!UBP5&7n-a)C0ux)b#(8gYJLbf%^}%{+0>8kGF+mqnHEd$vt`l!TkImGGJW$L?@7YBXr5K@=9iHLPcnRjYVz-x;=io$nJ8n4B)NO@jxvs2 zm{`{o<5#&Jdj?#Wi#2C?IyidH*2Trv1Z>$Bt9aW+5< zwEw3~s(Ov(VLRvb#0b6O*dz6=?uFK};h5%wMkj74yh1LO7Px)jCGCzLYE;n3mlwXt z7I;k3XZ?KjiLlvD?14HxgLw)e~7%piMoKx}zdK3+!eCpTI99h6E^S>D)FU%2yOMZ z0CzW2khQbNY^u!wX*rts)0eU$8X(Q~vj|h+7w{ z!v1PXjy^szIyGINgPxu^OVs|^@Ccx}Gh~RiPKl@6Irs++H~Y8YQAmb{0tLb&FqW3C zX)lgK3_8T$-zh%uO_|J_iS7n5C3#K|81r7`YVt(;Guc>9m)9xg{MU>oYzuRmU*)E0 z+i6jm-=Br+;7ZB!J#jh&&jM=|;1)}N7wJL^C?-y(Aw{&Azv56y;t#2gxJri`%D%@9 zvt{6N${Rl1s8eQUPIK$G#SC4K!#fHdq(;2OKRJJ;z^6Z_*SCR z2^B6sRM|r3uK+EZluo&4im;@`>nD~(s*NI%A;H1VSYz4nNl7P5jr$IDle2u(ZBWKg z&&v2~=}(J7F|DE+6BA(;?kH|-!Si%7L#wGxD7s+ULH(>csQ3Tu2qbmPv>}?9Y;`>` zyljY#9x1(=t>8%#{_Cy_FX@M0_KX$ZY++9FGL;OVk0~8yDi@kywpXWQXmqt^N@QkX zVL?hnN}Dl-S&jJn`&<6_{C(W*<}{_PevLsmTDLi=kS%`So3uN9JN7&s5$(i|*?*rY zTibYY^p?z#@k}`Ntb4{iuosjTVJR40L}w;kanZf#%goHITYVy6TT8z&A7|foazC_@&15RjSMopL)pw)@bkF?af)yL5+8T`^xO4D=i9c{up`8fJQ z4E?}rl9YjzFgO$Nrh#sKX6B}Ab5`PIVEj-qrdCZJ?vJ`Zeo{VcdmLosLo&DB zCJM@KPL5}xesZ!+HzQ{Eo8a@s+GG1@zFQ{o1EwM4`;T<@PF{S^B)RJqmQzcmH=nhp zv=BFtJTsW%R<__^l+7dURn6gwac2IWr2H(Q30+FK`CdoIQ5oG&AK_s1(z{_pq8o1pZ$6Wn-?&nHWMO1-raOi^2LEFUWo2Gmn6PAXTHCCkf z68J||Pa`_jU!Nmsa903C)gDvEaW6-2+^=1ld#<~r!vWP#tng?B`VBwLh}ERw@qM`D zzcWTDR1BNtI*MndC~`??BNEvTJ1J*$cHn_hDrY7Ba&$*RxjkCQ8lL?VlE8Z7R!qXz zn~!r79_M0P_-c0#4+xoN|F!ew3TQB|!oz=j=B`tW$%(TGaU+b#Ydn%|x>uMe^>ToY z_(6Gm^UB}moFNEOOO?P|*9T0e*4o=KbRuV@anCT?M zBl>b8%>4zZdmd8f20Knziuw@fNfkHWRf4dHS9mywrxb2VWp*D+BnYfwFk6}LzKuJm zH?kA(jTFtx=hkN4ph;o(Zb`bmL)e zb6-$%Jp`P}iCv-X9!D5dj}_!o;3 zj~8OibI|vO(0lXfB+@~G%fsf$ThBTG#+idSVCl8H9%3U^s|kBpYbkdF@#E%au0vs@ zD%bbLS2erOso4726eYvWNtvzSq4~Rbt@yByseNm5o~=6mewz7$67SkqGM^!hUU=%@ zJ&BF<^wt4zN_zXs)~68Wl2z2tD68eEw5LlZuA~Gx{$ZbP2wK1{+`h?fTrQ&Gu54Mi zqpnYks-QTv;V;BHhfib+s!qzFWdfa)| z%l=P{&U9j)oN*i{Tf!Y1D zr08ka;>}p`ec*6AW!^x$8IeNC2dn=Q#x1P%G>FZL)k7HTPuJdosr7MwJ>-Gg^Q!AK zO>SSom}4Azhcmzis5L_@8~=6cVgYcnG39EI3l9fKs$A78xf*WieAL02drliiS#xqf zf@c~{pap+=uH)i<9QF+nUi9K@tOxIy{O%Df6NN21T1*Yy|C1`h&JGEV=z_fV(pz2I zXJq7^pW6(j(71tyvxo71Xenc&)x^kF0Ta0)@2t|jJASW+aUs+&K6@EDWrxzEhQ-RD z7XK|=K@4W`v1edNn%Nr~A5#j_Z=e9$_n!o2Od5GdRc;Pc(wp$lPMTijKH>Ke!#$&Q-@X2ye%j~s3gv@5$(k{>pf=<3Z;51n3#Sr z_#Q{f7?OKuK`k7<%Q9leL-H1zll~%-CzDzpicb3>A zgXW$5okm^aAP3*UA6mbb{__|NXC5-N(^OTT_(->w__lf#j<-zb04lojtM~-*kXM(o zNA3UE?mGVt#mpVRfpv_v;*A&xFYvaCHmYi`#G`xN>paH00Tr&HiGLf$|u(IkjYl}s;>mPsjn!*{7&~5*7Ixfvg9`xiX_jdO>Y`X62c5Xx zYGa2!wXOSLYm&DWz0_u=OB{BVeQzu}1c_8P$f)hv0`Xny_Pmft2LyH<36<`(K3Hce z^J9j?7vJDd0E}vKNJ{6p?HT{z3Y!f!$R&U!oyww?7d0>w;I*nO~n zYM{ehKBwoXfd?ggH%9!(4!)ntHEJ6a=vLsLHJ(N+am->h8%ObopM)Wz%znu7F4ht~b z*7|Jt#m@|KW$CzkD(eMeo(CH&3p zs*WfsC1&0g{{elB>6~u_@$wJUMPN-gKrwAZOzQ@jb7viTK{)%kVR?pbcK?8~AfadW zTNPs-Tl-;$w+p@0?#;0dGl$q47|B%qj_b82Kj%Zw?e=J()pm4LmAL=7fBt5~NGE;I z;f&IqLU5S!oZ50z4CYI}_#G=(K=Ad%D{VNkB_-*KcxUM4_Zzh5vGpEyTw?u-8~5y~ zj`u@Ln&{g)>>E{C5&QbE%y9Fv&Mi~VA3PPRv@i|S4=+$d*Ej79&6xVk)ac?C*kUHu z*Mwxpy8gzq~C4izPeMnE?nq&6uoHe zFbn}xocPh4a$5GtM#r}JK*b<+$wJet~w^^ zHHrB+^*^yF3Euf7N)+8v!b**a8@O)uMZ`8zLzoE1~@_71qr>`)~@kC{c zs{Z=i`U!~IE3uOAQ0@t06OQE-Eh~S7PohG3ktbl2_s4bwM8X6MgRj-&9%h@Jo+=v< zGAJ#@$kX*XLq4mUhfc)^&EP9IXThg(V>=5NUUt3X+Ba}Y39Dz^dH8bR%kgMO$H2^1 zp`eC}txZ%=v2IqhTuxaTS)IPRcK?j2CeMh0$yeqRaj`)7*7-zO=?&0%YL|yLmdvI{ z2?wWeg*fXA0|pY-Rev@Yq$+}?woJb7AD!`t9d%r0y!IO>v^xi7ZJURnP4jVg0x`CW z+FMmq#@To-9D}SXJK-&CSM6-0Bfl3(pD_gm{wQsYY-`WBJoeI4Z>08yoDJ3Ncu+50 z8)e+aG6n^h3+r4CPQJby8FxpL9A7KPymcRWQBV}%O4|`KIA8v2z!3xsSD9+-)E*x# zT;4e7m|K*_)hU*Td&W^u?!>7P`gPvjsA@PlT>cfbJnS(+U>mi5ds z3!Ld27#=f_0W!U$oZxnTU%LBMSW_aVqFm8Zn!h)#OK+^0~IJgy4 zz!B6LU=Pkf_%8glhOE}fTYy<4(Q#|1uRB*M!f#dcILcFNtth&My??^FVSnLo(VBFr^f|Gr!ncxX~kZWbBO=O48kc8F{zvWKLjC&t_c>OSYa*FGpj zKzY#aRubMzvp~-y_Q9(|=Y+SOp)FICKvFyt1_p2V_>X!Fyh_K|#Y$#OH?{;V$zsmq zBf`MZA#j};mV7eKT@hF*jOIi&l_L8v-t4pX=w-X`1RL^Yp*}vFD~{1eQ{VHUO)c>n zqe3AnIALBawnB@t{#W`s`j;En1>=WQ5(N6}-l2knhhDp##Lfvuvmw3cr8biOOmB27 zEI!w8j7_ViWBh@W7%~g-TrFF)LVirg$~7qip6tr*)PvsIvRwK(i)50`Y0_{AA1rI+ z>@XhN+VGTr>-`F0PrCIE$-<>)?a}X95XtgN`3&teb*sIrG*7^ zEDwi5W__vi)%cfXYZ5)>eUcjoPmYGz@U=lLhU6$qfa@e}f}o5u4ancE(Uo03mKKVQ z&QBh&mfU-fdSCCOa6>kbgBGG9jRYO&`lx#&u6s>0p>E%5icBr(kN@GwUOUAsvsyD3 z)qSaGXW3Fn@+ao7ZysD&fq$?aM5Ve`)NP$qg0f0ziSs_4j0R||Tl}~P+#KVFzp?Tm zxQCSJxn^?7=aAUzE|J2T}s0g07>FG25bK zS-0QsNm<q>L`6iRz+ zN&z1s$9w-wVeN3oiN~Q3GUa{#w#f*N!uMWmG!IN$4a$Oi-!Q300B6ho+MFj&67Atp zW92QrvCn=os`4j8kZYiQ4NWjSPcVOrn|Pg7@W^ea>>9!LnWwu8L-}zk-xl-pOheQO?P-H(stPe03#a44JBswP$*air}68fZd{58x469 zc871M+AcBPZw1p;V9rx_dkOo&n4Xisjf@uqONE{qPA<$3-_He zpCtSeujH_4_l~g(xN~O6HrxFVbyK-e3s--g90=rp9;P3QlixBCFA1*S9|`U$rpM!GiQ4m<|}bOre&%k(aYH03$Y_v257Q|<65we z6I(Ico2Gg-gtZ63$HL7~MwOdBmIv<72XnS7$c?()Qz!mR}oM1`>0pEjw76Q7W_OBo8aLTj~j77{bd4hM2D!`uSa_ zviy_$S3l`Nje=EMW?vqqExKjg$5p)UqBi8he2bYzRrTm`c2if9G;K&d|9LRg5Lf`o@sl5R)+3rbG6~mz9B%XOtGTAr}HW z#-(EA!cS||SU0}MY*Z}phweP-{oeP_s`LPnE`rzi5-?qp#}kurytJ6xTXaDb$EJg4RW&nQ`PLs zKTwz|DiwAf^rFWZ>jtAr<&jppPcnl9AEzd&@$MSK*wUMax4#Mdi= zt{FF%NyMi$s2U?E&0y4z?&?VgW$X&^Hb%=|>6HE}NZdW)4;F*CTgGFk8Sf6n##)Vf z7i`WF{9joiOM=(2E+Q>lJx9~EvE8ioj%{$t@Wt9=>-2ku!B9m*Pw#(DC6V~^d%6OQ zF(jL|2ONe3_>;0_j3nsAi1`GIu_PaTwDxsqyH3m;&JV<4c6Bddj>}Sm5lsVV!QfA` zrOg>_w?i2}BU%X)9)Yuj!lu4G1C>C=#0QJ^4!uuUVDHlz7cOlIu|M$31sDB5S?l6` z8cq*xM2f6B*+y8a_`6OdOpoCuE9Z}<##}Qs+XP$VcB={zSR9gI`Rs~~Yx3pdlbe-y zb)GJ$K`2|hX#ZM@jg8WrIB}?r zBv0r>1(V@#9_M(>ApbX6LwZxf-FwP+{cM<=sKwU@ztDvW6(tajJR9oD6QYc=9KD2M za0^mRv?;qFTj3x91dZ8N9eridy${7&d%Bpf4z201PW0Vc0~r=lx{)ynIxVp?rdgak zubK~?zrF`VAN06%tBv0)+%1`KSXsZ}$&?tmytDD^6yAE14-@3eDbJsuJChqM%%w-+ z$xZUm0`e36T}Nlq+I=hwLZXykI$uo|KaRtkj}TuUcRRs#JQxcws`kwW(zcMh zL?B%CGx~qS{>4aI;BetfyGysCQYuB&s?~qK)cO6kWMa+3HHYvZlTGzWsK!Aet*Q&v zi$vn#ed}$3q2Dz&FhgHpR@$X1Ni>Y*hZ0j{u$TaF&!gyH6Ymlm)}F)8E5#@1@E>0s zozt}U#<#;X6STvEHQRXhk+?#1;x~Qwxim_-vEz!=yZuuwB?)cK_JbQhY#*V_c@(7K zPh^{{@wLRSyH>oPfr=HOddPK2&mjtB4{IzxgS_9!L&|{_8_uhIoM$BN{~2L$tTLs) z`3g6R$NM-s?z%a$YmMj=81TWMagAG{8Q%VVkDiDRec_2Y7D8_D`4B7-37Mi{TT-T( z7X0E@eV@1)oX)VkI)i~_pLLAm<1#s_x;Aa@1@rB?`a@OEd`^>CER>P^E?0BFlX0GF zY3wa3&SJ#&J%?G{k^J}L@)=dVOay;EgC>X^Fx|I`NaA-kAfmT_y9t+MuIAvx-5nM9 z^KR7UXwub{9arB){E`KM(#Gx`ETl?pT>jYV$@(1c8MS=vLL4LZv$_W zu;>uT9B+lN-@I|RxrI+ZJ9FXkXX>$>{oq$9Yfj6ukf&Hz%A7WCtZ?gdI1-D^y-{kOQ8Lt=dSfOtf;ERC&^Of0*)3fU<6&~hZ=EFQA zc+7>!UOUW~v+HtYQLrW@Tt=b`0V+8|uAFtBHoiXeg@0haOXuVH-nx#upU*{9Iymq)cx?A$ktWUYr>V~G{gb@`>z-qm zfg1I(%)Db?b%#hg)f`Pqjw(YMSg!^oUf3j816688NE>2zcz49c<*LT|=DnI#j2K@2 z_V0H43(4D;zP$v8ecJ+Ly9iK0)8?#?x7mR}-TM-H-BA9gq=Le!8m@lvEp*&9c(;9X^DK<#x-R5_*e)xf29$N&B(o^U|& zf$A3?-=_dP1z8}jrUJ>S${?zJ&oofotPnj}SV3yrm}o_y1Wp!fGUqvmoRRutk7l0f z7lc?Mr{>Nj3MIJzdGjpu1ivEc>kUFwkv~XX?fd|oKFHA*bLq)~BipOt7sJ_XxDwt} z(6(36t!4?X8nv+R%2rQWVqINxz)FpuL5xthU(hRa{M7sC>OzZz1Rbmq5*3H(FNsRP z+%?b~t^Z#&s;C-*GMTCk#V;D1#d_$d38RBJo}5u;EC%v}Ys~e0h{mvO+(hO$T_ViQ z;h)uRACFZyViU4k8Xe9Ef=*H{tG^x;m7<^jSCBZF=3y4Hc$;@LdNQ}x&fJXru^}P& zWip^EuC2fE+M5wu1Dh!d$%obp_ud#0$zb&`;f*#lsoHe!5%v3XaZU-h&k8>Hk@@^p z++qYLJEEk30rJ9?e>`?w(BnM8PZTVRAA9x8Yd<-qssUgR0 zEfwsipbslL9?2z# z`LcqNqY_;6g{4LTwW$NOk+sCR_zWd5mH{pNGY9(hu{<)){CCOL8+A5Y{>)LJvuvg?k(dsW08yJ8bQ1}zFsP{E2K02g=KV~$shlfsjj138VW0V6Rh zh07V;m+Cn|MfQ4Fe;9AY!IjB0GR~p50$Iz(ccPw+_SXYtSRfQb83S%4Wr0RFsmjFu z2b)fRW*lyNp1<8&Rt>r(4*T4ekd+j_(+r;=;w$^IlE$H-5D7CPY+^A=!?H5_|>- zCX9xa*Ie&$S^a{$-vF7hACP9f{x5rn$5%5rf@b0v5CM}EmG5kcFP6?`;Hs;XL^q2u ziLUPZ0-!)8^EHCp@b>FGGTz3?{i}h@%6IPTi7-#-)-%JYpX4|$h^;B&+>12q->Oks zN3T|3m^Mt1hi?f%xt{htBWbwfy{79La-}>LE zvAX>+0Rlc;tf%1bTNVWn=oJAn6#fSm&pSa3zZEs0H^KKoF9fi1vAfOqJd77MLf4Gf z{@*JYhK=sl7gka}vnE@bPnviiCgDMSLWzAO%e+=X5AOB=cbr@aav#38CXzl;Qxcy% z07t!_WGo&_AOcw7)YwP)p3!R##dz+Ib=wY26_^^WJC$rwhu^pM7uO_I)b>v?B}amm znc?|BuWy22TOO|MRY6p%vrlGxHJ?jP%VFz+1Kt`{fb9>Zxbd$+?LWA|D1@`9sL_ob`5In z!&zW3(EpIO2TCWo3a~dhGnf0btW5x?EA#vNf+l1##Q>_dM{$UM<;uzi%JKX3GA??} zc;vn5bo{f8W9QV}XyTgjfKiRISlEV_9gHzm8!1K;+`&dY#K;S~y4lgoIFT86b@SpqVK#sDt50M5QI z6w=SdBr_}DC7G;~G>Ge?_U59ANsq+Bc3m-?;v%>`^wo{Jws?;8J(aQDMz#d(Gs&nC zFOevtX*ZbRAn-cAFYYg#BBTNL$_d0-nmg=sL)a+tHI|sC(>e!gLmm1FhQy}Oi4Xff zF++deT;Pv6R(CzPDJ2$RKn!aAI&qv1yp??;S>T=j`81ZBg>d!AA97~Hh%Y>okr%^l zMQ;fkIlU@I3!H2tHbKAo$dh-))on9qB7EQ9md+fxQtd@x;XIkc=r^yrrj_e|yXcVw zE1gPqBBK%Eq$1q>&#N(m?LywN{-1ep-=9_l{YJ;xiu~PsW7?wh~uR z_&Tq70&BgG#rNZ#3C(f|rvdS8NwUFbKz5TCfN!FKB2km(DgD5Pjh=Ak882J^++S1s z874xY1GQXz9A1hRO>8*yzj713R~Jk?>hJ#Igr`H-t-TR&W;56N3m)&flC zG2KO2vN-%L2(a~8p#3O(^L_5d($8;BWPqU90P_U+IF90@WZEAtI*)_&=$l}ZiEEvD z$oD<TXAq+4Z~a7qtx47UYo3|LwkgWl^F6+dw0wc_~9 zrW)*^q`A4e^D)%A5k-f5P*JyJ##nuBQXHrr#_%AGVWRWpStPtO;l{S0+UIgV z?OM&*!M++B6|4s4z76uIZpRwB;L;RYHx?Kh{dv(t_YU$3WNj^s(c0l%H1Cvy@Dm6cEn`M==)+qCET|Ly`@DS|{) z61<76nu#lTY2N_J$m=zIv3AGzBJBjOBwd#SL2nVJ%`K()gGtbz%aq!6+v? zRKr_$aZ)}a)1DJd^LtXeI~5cEPVb7TA1(88Pir7K4ZNS}@ibTOwVXduJ(XPb|L2I` za5M`~v@)Xa{*E~P?~e^7p`6NDW4#8d^z`8fVTpmqvVUF;oM3i(w1`;V{Ek-QhV>b9g@+=vl1;T9Q;Pni-N>O9Q{sbl4} zB%7D~Vf5FTC!A73n3yL3$X>Q9KlUDybHwFWphpi!@Xt5)hz$agy=$ZJ);q}bbJA}h z69uN^`vDl$|AmlxTD7pPg>WOCCcek0GgzV3n(mE!ELCbr2p@(j>{o}E8!O&p;L*`? zVQD|X)H7Yq#>51i!qtJ@>p~6$>QV!l4_5?FdCj+{pEGnQ@bbdh|8PsF8yzb#YxURl z8DQZ=uhT_sq(sbGw21eEPt|vop!^r|8>a!(p^6pa2Ym7hSXBJU-k&t@I3=pgK;0cR zjIEdq=vG5f<_i+@xOB?Jzs_NH|8@788i?-t3=Vq19jwp5u<3t;mA7*kUmISSlJFZt z6gO^IXYo9p)2qk~X6$X=j^frSC_X9xh8tQ~IBkCN+lWcVH645p9aLj;#_$$k!tfpV z>&_bJH-~92%`7CZZ_(*F`|qUh8F2Q+x?};*$XL028pkpr1`>18%mrbs1U>VilDQXL z#N|TnnRA$uVBRB#aF%p%Fi0@TgFoBdkzsN&`z59RpJ$xZ1baS(Xjo9@Zy(5`LFf|n zH51;%1_JIM=O}qPy{;1Vd{%v1IHfc}8AiP%@6j5?J)m3_iRb~1FODCgykcon-JSx3D!wBZX1!LhG$ zSv(2q6`A|ynC~HU#JeP$ z>%P{`cL`Z7JUSHcnZGIG68VFTt-chh8KE+ihyN!-St;ZIA3)`H$&);A)nUB~Byp#b zArOI!u*<{e1}e&1&2<1UeVB2?<)C?xFYzA9hqG7}$`8G{34@&ic@`;xgn_oMv=Stv zf0AmfL)Q;JT7s2Ie{Dz@d&H}1zuin8`o}=Wwr*!epEw{X9$|1$HYXC1W5#;IN0tEx zXiR~O@)VFbbnpgMML^=I; z_Hyuj^|9LkvA0GI$~C|RP(E5fXZtr|Uom#Zlbr${)~cA>wiU)QNt2S0z3SCgx?Fnu}pGiwO3FeJ6smY0`T@h5I?USwe$5C2BU zM!|D8v*Y03bJ`s{B>pvwV7S#x=)4NkjGEEVI-X+N+u@z4)!tPIdzE0?XG#IP<0mJV ztj&+fuv(;$w@YD=?$%wTxX_d)@HK2>j4x04(Q~au3Ru(X{)bw(65oREqnoU-jQjxQ zm^_7T?j~1%JkM6v3L|5S6@TEdT}a7@ zpFJ5$H8iU;g?~cf#7*wpyGGkSs4oYmu;2iignDwobdeK}6Zw5@ssz|OM-uzV*fKv> zS9;~zaXuy{QNIiEj1wParzmcKbm^mAD=Qqm|13^$^;KzM?eAqJO!QJ6(Uk)xEpV8^ zP~ZJUw3X~WAT#JslK?A{<>{OmmX4^&(qE+bL;+}h2JAJbR~K2uUUt9vp!7|TjUzz` zGtd|3!>dlES3FH2T@5($4J=7vus9E`4P;sL9*}xF(}bgGn`_#);W`V~ZF1_EKXr~9!!%UP37N7b^R)kaQ!XtQpWq&Vi8G2pKsk4 zJvU}W6=!|8l!@6mK9?s-=6i7nyypnC7xOPz>>X2pGEDB=L!VdV5^L;$g->A@v93V8 zS#+N9hFSuMb?k?|RJur8zscLNZ6iyWNhdDbmi$90SMSz)Bc@VQO=?w-r77Be*t=Xeh;&>ch_A9Fp>wBj`{Z-U_x%g z`}ccgm_(a>+kgMUOQ`3ofNtLYCO{balH zf7qRR42SjJNuTgkC4bM*%c_68w=bE;sU$d~xw}=6ZO3>* zYT`H%D^WKe(1kqw-4=VKxoN3GjDIEXeHn;A7(m*y2+-DkhcST}ei&qCbzTIj$QmXP zg$-V8wqYC&`wgl#Rsk!ob{$@1{rZAEAb?}Z=Ri$=z&3z$hXAx<$tMQ(bIV_9g`VXs zJWD&{n$Jen=3SlLKmQL`?*Y|x@`Vk@f-br?iU_DELP9lCq=*n!iu58)KtSmokzRwY zC?G`$y-Jr3N-qJFV(3*MKq9>pLkkcf?}Xj`fA9NojvfMJelvG&dG5W>OkRB|*DRYo zL?%0|_fQHK#=Q*Y@7dnwHTlt}{)26~e7 zKriW@1ALDv_hfSdc_y>PBCDX_`?^7wA_eWPSck48$5M$ zbhbjJ(@fwb8@A)$ErM`0hmhdHaf~x{{aneP>H_KZCF-fB%{rD+{vlQ^<+&s;7d2 zUN*&jUi2Q}kUkBtBI#4WaY34akv->Y9mzbuRV~stb5fdDQ|vFsS~o;M-00(zF}qDZ zR)$7`OBbAaw(6Xg1<%}ZpL5m)+`7x7w_HzNSa<|0rEWH)U#aZgyL(`_m7e>GUJs<_ z%P--quQ93_obQ$QlCu!q8btI{UXyyzwdMn#t)F=KZ|O~3f^<_At*aRb^Vmu%!6qMD zMu0U?!%xNxYNbIc$~=jy!iWdh@#kO=LJX6Nlt#vLcEv{Wyh`I{&fOXKYkylVPz=5Z zmgE9V^4MAR(ScW-`g1Oi_51h&_RURwMRj4WClz#aIG<(M4LFjSSF7+R4%b#GO+G&R z!zKdRAE22r&@zAI7Qs&fpn!U{iZtk%8r=~tTZ6IC(N`4*u0=1KQU}- z)L8;&XzHpf9$9U8Kpm;cM~Hvg10TKQfg81$;A9v>o{^UM&mBo$rcUY}@d95Qqt#sI zn4kcfHJA|=A2)nb4z=Rs**=lxvbx(*Y?VSEoVoj&v{Sxe`}Jl2Y{!gyeXio4cqZqa zKOh&%>43O3pJy~004-8TJf9++q11aPoe}8PRp3#%&cWi}>!6u^xgUocXWof3FL!GS>L$KD zLW6;Q8Zp7~U{jzfTc?N$#O=jLu#ct1@z3O4z?y?3)$W}y2nm6T`fW|RAfdC zFZEqE;(z(-?&X^P-OAUS(llUa5BauCkt05iKO-|oLdP#nAYT>tBHZmc5vU1e+~x6` zh+SN&)Gpe-SU~s#5J9n+mTi_+B?((%bu)|dV8ID~pBx-|n32viPe>c)a zX>*}N&7N2S(@o(dnt0)UYn7dbe z8!g7YH1CvfO^|;@SaewiP8nX3f|<$FT)ac@!x0+Hq1e{Lg|)bHD@wy9DN)i$BOzUA zd>@fz<*|E!ynUnQulaTp{J{a+B){1Pm*PIT1Oy1+@qpOqW{2u$e(D|hU1&Kil$dLH zkQopKly0>9n;dFM*;VrQTHX9UOMCb@(H6yTT~&OT&hh~YUKWuPWq%*851~oT3$Az> zLp|3Ca$kZo{$K;X`WRoIW}Ayet`~?jwz`YHRiw zL+6u2`ofV}cz>Cy$`Z+l`ahl&mVO-ZYn>~VuOfJM7qZWPkaQ=&)LJ)@a#3Yob!MFS zm{8X}4-U6v;b;NNd)F_faGc|TJkkgFDuC;rrXfEyo^;drAG{vmd)o8ec7?PU^26L^ zQvl73?*|XINw_-t=h2UTJNL;y{c+zgPL1hFCa)hi=jsQ)o<57m55BkMOH>#r<6RqQ zAA2uFl2P2Vs8}C2gGUk>OY1pis#@!jPXO89H*yp=ui=F8LklW0XmfTNAqj1|4;CkJ%D}Sz%7tDfj)qx5p3M%{Vd7 zj)Lk#Q5=Nc9S%(uDqOXUGo9*|`@NgJV9(N`*0MA+6(ZL6bD@*A%*E7K`JcxsDE;&%<@|SlETk~_gHphz&r}~wm-uKv#N0=w#zs4O2 z?>EG7m~EIU`RBpK3HmW7KT+Mf^r>tyO;(|Q0c8M9HSknTYAq@A)z7XZc!oy+k9cxb z*4!67Q3bzUiw*lJvFg z%&rI((2B#-t+|&MMV1LhH6Cb=EGf*dj^Q>LG$D}g`ydp22X+ei*kV`4y(&B5H0||e zEr$?fAeOBk9TAuk6iPS;P<{?g+_$yLTuhc1e7M4b6Jjv;oj_Q^In`j<+Lc`*%E-W1 zkPyD|B&n_Bb>w1beSK(`n3C~^-1uOQWVye!uyC9EU8Jlr&TXh)A6eOM%K#x$@y~zQ z>i=~X_5clje$+eNBxRT2ab7($53EsWk4_<90 zN)oiZk*I#=2_Ch@oXOpt2E6NjANlK(84Kga!#3YVLPGD~R<4xqy>kUh&>a}FUj}#V zr^~@Y@+))00ypf6J%<#d1gv~|@yb^KBNo!6a>mqDoI_%pIGYS^e2`WVyQAv9Jo7Wc z=^Sh=#)cNSCGTJ39^Kn+CDjAVivqla&0u>*dLZVUD#X@NYOG}(aT_K|DGg%Xz%gmz zuw`E!Ar39b4d$EvW)gA8$2o(}&j!vnft2IKr`Ny(v9tj<`S!2H)AuZ8U>cA3X4-=< zl$%+=rng*X%f+WwW24nWKx_p+F|}(G4Me$jlxJlGYdf|E3$#?WUTSR2CC9!DE{+6o zW+nphYV&JU)+OZ87}3E}u$dTUIiZL+)YRSOGfK-%rEUZB(V*z__qyBxFMegWjQ8o3| z60?a2JUd$tHoSHzlaRV{WVj<5*+x$8b95x*oN5i={lhhj@^0Z$89kj8?b=0?yit>k zpj44HSuF12`yAx2#XG6&=laea8#y(regT&Hzcd^Q4E>Y4Zc-pai7YgM7i1LySbYaJ zyvVZ$5}7Xte0BG3m4EP!6qG}KfmWXQH2G#H&9^i+-|gb$=N|Y$-ohl{B=-&nj3Gfk@FXPY+rLm2BK;(}q!pyMrP{h{JQ=^qa&=GTY6m(1Os@FSadzTS z$5qvv)s!sU9=TCqWZ@u*Qq*WM=qNnyhkv#2XKG5l9o@6JPSx5%WQk!mNotNmXY>8p{Eyg`e(oM33hOLekvy<3{t({g zQ70~U4?T7rT~m1Ss%l4u^}yW^#Ue}BYXu^h8g!x}m@mkZbMYnBx?8(F#1KKPe; z#g-imfAyQMAF9gxB>gVAPw7~97!Z$k+HkzF9$z5U`=Z#optDe3B zRO7MW356JtU-unnM^vR$y7pG>ITOH(muU+AfIDly=5sw6B(L|HYJ>9_?Z4!9j0iPe zQ!&tEP(^0DQ6$35uaIUOFXbn3s7S(llng$6@ZM2-p)c&gLdavFD@B4(#EAca(}ai? zNbsFZ8qI4LG^9^Fsh|VZhcmzX ztO_HeYWVdA%k&Zfr+(R}@SBF1-t-+TOLmG=E@J5@!uNS)8q%w5_KMqGZ#!qj;^_kK z#_e$b&dh#w#Gx1JBsh2R8LRY3(0l{tfsf0!fSi0IuI$+Be96DWl#doYJ$!2!$ zBTP3Td~Lw2s7Z`8E$}vwsD5Db8D_)D(-@$oLE*zux@o22zCzj?mQQv1vqCdlkD%7? z5n~n=ia>6y`E+wAbS5WH)I%^flY=2<+y(e+O!Ar}k^p~jB8XZM>67Lz{uq?-?-Q;m zAH(y~h{B-rX(}JumUw@)VHq(YKX%*7`{REsdqd8_Y`%7!4K|wna9!LxtQdJX&dZzl z@FfLP10UXo`@bhOiqs%YD=@ERM3sPh8>}bJ++gLf*^r^uWLz6#E{?i{OD=N^`W)dp zlOfNY!Lp>k17n6LDCx>V_3jnzVCFb~96!*Y9q5Xy{#?vVAoW?)OoOS{@-O4e%?yvl zdlGJaX||Nl)~|l`g$cgL)z*~C`uma|CK8#(yd@5|22E5OtK@O1E~}BaP14aLC`5o7 zHCd=x@?ZaYlBTP`xlq%@c$pkuxrhr%XV{0Lr)g(cCGs46!Fi@cLAhtb2_8sPW_w}y zdSzp9e^1q5nmwi6ak5YShL3g%6Fy!YH&2GIUY0`ux1HNa*3$bzp$!P6F6hDxNKbC* zQ+3e5DBDOZGHaOnZAGY)ZgLh(i$-~{xN+)6?a{WA$-|@@T|NnS6=#6e6{rPuhmep9 zp+{+tQwP&<8--b!Uh0ckLHFfmI53`w_3i){B(XR+os6T z+Z=VeDh|xG+oPw=gH)WOsWmCG!ar5HKbpAH zo~`qAzkw|e>tW*>UUgWd^Ca-f&4h-)&S$=)LllJMe|BS1?GW?snrFLqh4r#it%D3( z-S${_l02_UP`~UQaeTk@#`12s#3sKk=;ThJW38Kvv9bKSYpT>#cJec&DRBS3To} z)ihOJX6}f$->h6%ynaDK#;#tUxno?ct=W`V$h?D`?|3|^j^5rFetwqK`qHt^Jb8Be zgs@IX$zuR-&K2f=+v5~&vwjE^<}-w9BIdqHhlDrMf;w9n|E0*<#O$4|rbF0v51tZM zBegIO_hMf2;^Z=IA(~Y=EasR7VyeyF?mQXKxw9ZC7d3jFZ%2c8795SlL_p>4up+*e zhs6ouwx0T(9O4-sRNPHEDWFVL8HzX2;zp~t`aT?gH)8fw_gU@v@%t8`h63oa$wxCu zj4`LBRdw6e(P zzQVPYal@HBQ>A)UgYXUC8 z$vAfHH~$H)-uw)|wb(B1wQjk7-@)8dEj`@0k!T%nf|eTbX0OQf_O(mAJw5F?Io+42 z0?FIjwQN;ee)Hqw+O&8yRcst0=2u1p%hK0!ujYYw!MiiqyNYDMrufT6DBAdMDrvM-Pq#ba``Vq- zj99rii*s^oRX0bhRHBJ0zaq;%#ZvW%J z*u?Xo0O~q0%h}*7TFD;5wtv(-6S0MrihX@9`Yz;@b#y#zU&}X*TJNX!gJORK)6Q3R z4s}ffJ&?x`Ri`+TRTv$0w~9#eD$r|S~ySD$_+^nM=MB3>He82(0!RKI1Z zGNjAfO3I6mup0mEZGV*)nwa!CciykpMc$e4W^R>GfP${%;y4Wq=a>H%E}} z#biozr{+CKRRPBE_mM@fwI|LcVKr2@9RIEl!Kkwj9n_Jr_fh9DMU$m zwZG#)sqZuSwTNeG_QqQl^Kpx?S4DA@IQl!jusWCyo8A?LWBcbSFB(_OZ(Q7-=g$Zg ziADh9NX&T3HcKYv`221$xMRWwseDO|NT8HJ?MS_TM@?3!Q0a3_Qb=Wxig0c#=w?1~ zr7awGhL z-}H`EMRujZMA}Z_+I%sexaV}FeR+nt3xxa23_h_khU0?8CFAYSS$le!DpxgT{q($E zSk~ZsH75Ut-L#oqp_UlJL_y_cdO|x|1Qz$$*&;to%zXNl5!qQw6Z;^%=o-H4OU6sO zKo!Pf!Qe9MFMpU>!wU{E5&&nb^uH({z91dfW2HaKMg}(})e+kY7Q2jslipPB$h6(Lt%UpK) z-WP?dNT$`MwGCoWN=PW--aHF%P0NvaH2(|K#f>jE&3CR~x62WGt-frie(fAQ=Rl(} z(*GUO4C>oXj^6DFnO*Bs)?`EOd#qG#;FRRgmUs_g;uG#J=uEUbE#E?C z>dbFyCt$^dvSYm?ly@@e+X$f-U7fD(FM#iC35`p=Y!xiOH}}!sx3^R1ep55OUF98p z#NFBO9q!;6PkN?s-6+52;QLlt%`e|Q!Y4G=+Pk?>=*{9{2|KI`#4Y z*8u&7IP$1{rH>4@%4$<>wQ&au#AJ4VhVTcN9V(BeKn2=e*NBPCbRS>-LmLK< zf5>}BE1960ES~tHgqwVCHtrWo$FAB4`9R`Nf8SuY$4N|o`Fgl=mo%K?N{S|L{0e^jC8xe8fCYFt&^zV_9`By0C?jkvFCxJcm&91|dbB!EtXoaCjf5+-1T5qS znJftgy3}g$!FF28$`R!|WoQIU!_^|LTeaH;K6LaI?y%S!!U&B|i3}O$Tp}2~S`V&J ziSi93$XK6VsPMRw{nT&3ME8;W^8Km%XNPVLYboyNl+Dk&h~GjbqpvVd`+ryHg*E7`CgW(PvtkL z6S7_3t^A6M{DwIwC)OB1w)#NOc3X5zb$W*&T3;g<}|wrIUf+TJ&OMTRvy<0i#_ zIm-+yRM8$cHz1ok^0QJFSxlO7dS>Z=u-*AZ`va6JgC5DU`52R)&#Ys_?8d(ijlam` zA+T_(*~g@3^AC&*lz)`)LY~ydJ$n9x_J2-7AqrKV?>l4j$tKxA49%Uq3fD3gF4OUA z*49f1)h=rc9@{J4Qr|+>Hl~kIJR+6`54EBqRgTg^1Xy4i>)b;f)+pLY8Ol_d(Edt= z(4MmpdXZFKRpo<3_D-qLKA^ND-cZtQ&8~j;qeJfx8y<)aWmjnq) z3pQVLd1QN6CeLTR;Aa%N@FwIRN&_Sq6J5en`C<@a8_8J4Di}NXVH%Pmpgb1Q$Aw(c~`t1|mR=t~2~oxGrOPG;sAAB2)?hMViV22ze4Lpw1`)hELN zqzngoMce*cV1v4+gO^5jc5xmRdmotkPpQqKG6s_fV4>D;b>@%<5Y=ez zwQm@eYV8r-gbmROrLErg^c#q%^g8VD4jOff}VrMBu(I36gDisMpSMsHNQLZUW4tgwD%~q{ZQGuB5L;7 zrsN(`&8~eRFDj3l{?Au!Le3(W#=stam5iF}o3c&T_Vguxa z#dMUBb+O6?HV_GI4Ge~cGBB!aexzq4c+WH3z+EV&ZEcBMl}+iLd?z)DzwB-mI&e4M z(`}Q8kfaN6_003Z@s8+j)qBhP53_EscqVU}RK#ZQd!L0o{|h_`5`g`4=vS~sxp8K@ z$M|_z!YIW5t_#6wqqolR9Ib?nAtkNuf*&@eqqp*|Zi32H?Vu=@(r>a+=F?)SvZgfs z-goRmlZoiuex7_=wp^d1f}hdLc5GZ2V6xV?sQC|-p~Lwns4cf)hTb&X>N7}Sd59i2 z5W}?aplV{q@Sy41IvZn>lix}#VMAm#`mcr_P0S@Vl1=kJRx_(+(4dI#2v5MZ4GJ7T zuQrrL6^16)SH&Rx%yiwo5>BYbEXT8fa}(!C}|6iX2d6 z@NCsA&|@Gc{rEgg6#6?XH5aqK$MK1Q*+=+HaCsIJer@x%Nm2? zyiI2BpCVh#9!fLY45^%637w9=(6NrJ< z*MQdh{TaRE12{yVBeY~hgPcsKz_{f2|H_zdvy1n2kV}g(W7m!ta4+$6p3+uuFMn-d?7&?87 zigGk`wa&p3xZ;Ow*TbeBt9&I>=7WXyJNtR)6=$-cnZ0&irYTeXY@hHd5ZqPl02!8|!H$Z^uq^b3E_GzFyZ)3fIoWhhRxe}vE>PPx|8(8;&);wqSGyX3RKiI=uk@16`rcY?IFR4u6D2q zkWvQ6Scz<4KC;c`c^#|Hg89O;dbYM|1ihvT zMg;Q)!)AB;0o~DB`G#%^gb2h5qzVbB&3+07>j#4|sQT{(#gOEh|H3}=6J*p{!P~?H z!`El$3@2lmp;ZyV!p{1|Kf-}#ly8a@fVuofaQW0+JAFFzBXC&(K|rX-Iq}=rCsQ2s zG^krQcXy0jCMiH2i1(Kt#}$s;6^J=Hn)dhic~c7^B3Mz!Lw5 zC|^B6LFg9o4qb4a9BO?oG~f^n3c1kI`{%8j+~}J|z{qd?MfEDx`s>k;fMmOAP=J?H zM6DF8+>R0@grNFuQ8~M-7$9AY7X6@S>Qh+^%uD|+{42W`>;CSQ;FiOgd)+4A0-)5g zAq!{_JU^p$|47yNs|w&dUdDm8#4*XP<<|T+#hin%)}PyuLczRtI2*VB%}|~Tb;0$N zW+3mcF!hz@_TC>qLJ@&t;k2Nc)yv^_N8DeQ-MMY|n;s%H1x>a{2$-kBNT6vz+KJU* zDbg7%hSx$XUPc9XzLGP;F~W?aNPFfZvmGdhq~gaDlra?R{wk)(C*gSu%u~O(LIpd~ zep=c<#k(=+1mqhSjLPXcCcj;6B3z!LO6N)$_~{@hGAV?BHiVHWZ}QVQ8hXQarrhU( zje@M-8>e5@S$JdPiw1|~qeO{2$n|0^@Qgvhwpp4_8J`|S*Q-0lU}#Fb928sei zM5mck(-@RqL5)P8IKlrPLqUtU@9DARLF}VaM>)F={ModqPM|=AE1s`+_6RDckaHO4 zkuqSV?ikw0L?>Efsnf@$;$ypPx%KXwqRIW_ zFzIGW^GNT6b7 zAXo=B8U`Fa3->Td1*94rw}(N5UFf+DHXP(u8u#>{G134&*4!Ok90A#QS&A|jHvQ$9 z#IvuRgR%8OIFiUZx=Y7%)Go#(vD3p#3XrEYeSW+Zy8Mv}nn5xCnB!5d4Gn>HQc04h zEn8P0-4-$HyY8D4USokM6-MnWu6XUt{5P0l`AwtO<7t!*Zoebm+z&cX%M8@h79gvO zR!S|_F~M2Hn}0-Jf6N|P{noR2*v@^X*fpfS>Ef}WaO7^`zsql4(f`45IfZI3V&{AP z&QN>nixrl>@UJgTD}KF}e6__w<=3qMB2zx*bM`V%{K3GLx(dgss-FUtBC&&%M%-5U zO=HB@xwmt~>&fMZJC)nYiXg@El>tJe_Q@KNqh-@^Ubjv|V=hyvJF(Y3Gq-zhkz}t5 z2A=`B669oO^!f@lZufT%We^*>zA1W8>Q=*bpdSi+0x|GLQ@MfpgP%XtHjUj#NU%)T z%{cYW&V?#g%#RCMjoazC#>)8Y40A-;H)p@(*(L1}TWRDWH8~UqFwy$j&q(IL>N+WuqxfA8o)18LIz z*Z)L+1fkp1FI(Il(!x!TEcjGYIKYN?#Hm018G;%(OKpuOD7(b`rnXX=stEZFPX(9a z!bcsd5;&_UqzFXO`?Lc^|2lyv61}pf{MyoJs{f21;k^l3czj1zZh66Y({nzow(L3r zG+hx<{c3L8*{-ViGM+T1O&r9=J#^uwr%7L zts|*FR62&H!A?u6%|GflcfYM0r@~qyl5pf^M!N?0-7ZJ}9v+Mm5NsVvWcC=04Smo? z^7qtZkwc}`?Gw9v24jL3fYUTG8)sH*+ihNSFA3j_+Nn*((dd;JE_v-uw^#S~btJ_c z%p6=ARzNozVC8S{LPb7O({xDxOHkev@tf;aR)2_a0GKBr%fhxT>r2ZKbo8#4OB?A^ z7mPpir6B19`skg9hH%1PKZZHSJ^2rpx$#~bq3GW+gWkIsA+1~vA1u8CUc7GKp54ll zGGTYKw_eXIJ8`#n(n3_kZKGX9bbS+#Ad(6sLtw$kyZI{ZEFdlfa8N@*5a>-dy-7}__*+)& zNj?CCq*89Juh8i1gTro`o)gYLT6S-KSbt)~pj!E%=BeO@a!Ki)aIBtGo{m}l-a1ud z)Wg`undq&@&3wpVngl2M8sQPJJ^C}3TpLKbAv3%E>c}yFX1q|;I|F+;#o<(p-eee{_7?%(!8Hz83p{o%kk z<*s$LN+`i0gBN@RSn;!Stx<}UMFoNP_iKHXE)XTuk7Oz9^yI>_ z8aeZ7#jEYMKF&>yWun9KEWoE+Zcyn)_5y$=eJ>#qfmSZ4duh5*wM!#m| z9>@)jJnT)=EAmO6cty&a9(WeTEEzshEqP-c3(w37f$nA3_zF{ZX<;c&vm`U!XTFnI z$d}M=&Da6NHg@Vr+Jno<>;c+m+b3;yEEE#p{V#kCP#Vjtrq}biIt*}C6B;Cgb)55k z=K5FkhQdew2D`{m*SY2xEH13C+aeO?q_>00G;pSCg)S&FjO9mZHgi0vn(#BA{u7iR zgfS->nM#lc4Q&J;*gXuVde5&rxtj(_t(A;it#EbXP2@4h6rvHC3U1rK2^`I640y($ zU)X!diB$TbdyQYN`?$HBeW&b3Iqt|KPe)g_%|r4FsC+6vZF~3@PXTxDZ3Z?L@J9S% zH_#EZsd|HX);s=QYH5>~e=qp2`~5ou6Ngp4Yvqo6%Wbk}ovTLs$A+!gvFb122;?@s zAE6;^s`^Us*Y<<=4ybd`x|-k1_g8dQB^VB>2{)0RBiYccvXe@`0gMeEVKx1!=S_d2 zz_v|REuUc1logL`voP>~J)l}~)@N(Y!-sEGI~pqyT#wQyc?}eZXEcvX01e|8M#mAS z<5%MH05kuwnMhDXxdyv~?)VqUb)k{XohfziZf}Qu{pli)w4eMQX^H;V>=%x9ik0B>0rs`!vopCeedeA>pP z<9tFHCVI!NbL|!RdE7Wt#h1>t%A9sPy4<w+d)bh##48XMy0mKbY6=AX&!-N>!dW9n z3rF-)HaeH{SdaEbZ|Xb0Ijx_qFXkNL@`Uyp5y&f@8}VH*TXK&^td%5mm%fCTuQxxo zE`yzZhPCtS?_J}zo3K;e`eHS}{e=4uW32B7Gj!kN^jWgeC&#H!Xzx8H|13%Q34{_A zYQ90?P@mvtDKYPP1K&|yO!@;nzuH6?_{7qgkg!2R!h4SfzPjJbZVO?A_TLBg!4OCm zHNkz9P<{OmjcgSa9y70GuEb9@$voHanhqBcuLd6O<82r^>5y+cO+wNCig3Vg=Ln=O$e~0^<@8jw;qIz&IQCZYQi(E*6)c&Wm&DO z%v4mU$X&r491N>(&3y=tR7e%QB|weY7pP$j5b`0LXsqMf0JP zv^SNia_by8{ie-Fk9wgH2gozG=}YeL#SLz>H8O&InbYpyH3ldmRSB(d+_V+fY>Ma0 zQo$B{7Sx9@Gs;7q)JvrWi*jv0aJu8vuN8?Uz;HX;-O4T)<G{P=MMQyea*&_d9LGA^3i=# zg8U?`m+COJs*UB+;*g{F!t@del_hg>sA;X-Mus)TV53LY+QQ5gGHf6fsj8s&QEgpJ zsJrV7nonDM!v3XJQILGKd4;p{p0^##b3gcgJC7`GH6s^=w`f?8T zgb#IKMEp8`;cIeApf# zvc_MM+bHRkr?_;V@vGIkufAm{fyPHl;)dS$Dw-i`wNY9a#0*=IcF_(1{Bf(Nt$jnh z412nJPMf%EY&j)6&6KCV6@rxaYUa?oj_4!J`uU6ejveNtUmHjJ8&HmV({S}d&ID^; zLV^Sw6CHHE#47_%*klU<$rro8)QA>OYRx7~PC?#(TmTFG(uM6kTl8578FYsrAEN;~ zQ`!_n)xHOv$#E6RzI@X8FMQ!X5vanTNlt`gS8~?tSi|k2z{uH{w9Bh7tjC0lK^1Z) zI#gg|Ly&!)CCo!GhZI^j6zF?*ftV4qjV9sboezRq_?_d2@{1VULYFSg)ph#&r-^$7 zpFeCk1<3%3%yD;_b1}G)^jaSTuTO5?2Dskaby@8vKNhJ-B;OQ!G*hju_V2L_QUB zIFj!9mR{XW<56HGCt2 zZlTPcn7b=pJ7=p`VNw5_(4vxI6?HkcWnd>(zXRR!7i|*cCp&SSTYQ>>ZS(m2Ex6pv zCSRr7`!Df|LXGKcA`d-xqwvTLyx9G9AuUpNL&anyK{91Dfe1ICZmq+gRK4o1BpkN4 z7%Qv(-b#rx2fs0G^%AAvW8hTVR3jOZHaL3F96OcK?Ljj z*$uA~+pY;+Yk&_O9MoA|2N!HWII>AqnM28qOjePMk<+xLAfr1`{zfYvRqGJYE=v|0 zeuy@ca2Vvloea?u`oK{~B;zdx5#$o}rEsU@pY!6)Er%I|_~9WaWHScZ2J-kISxo1T ztyDBh*`@WDT;u-R1wVRlCuNGXB9w~#_Cq!aZ4HI_d&tLX%G-yRx|#N^u9sXG>gJ}@ z(4U-Pb#pQJwj2JU75Lq56AlN!F6lfB9L+PLi3&{d*mc;SmduTDvJ%{%{JYvvIEvi- zYd`~0uT*Dk(LiV?UwexPm(2)Wl3e3TZ29J@qHa9-AdN&(=s#^kMd6n(fI2cnx78ok zBuriABlz(#=nwAt9lPO0F9k6+EAUu%_F|RE5nCseiB+j`8vXn$+*H-lgfgXq;|@4% z6}r)_7RmuP3mN|)awy++$p@kE^3pq|qKEx#N`MfQnRcW}mToWfPgC5g{`39D^5@D0 z6CBMRI$(!tseON_F{oc=13Bs^%L3En%o=Zlf!*+$mxA4@azmXq5pRaP-s5vNjdx5` zCj_NV`6`dAR2WV=4f%L~)a{@BXjFwOSb1!Adwxb4$-A$b!CQ&_G`Lhf)e?frPosnj zG#JpGg=7IV{ag*CA}7Sl^dH`%I;gOpTBB*=BEQTs_q+|J(UK_^l9v;wa^bt}`)Boh zP4lha_%U;OD5lG}PhM=;Pi6BC3e+T={Tm#(kdi)qe-dYUKlzYHnP8Qnu`#${8FiNufXfhC+T-hJ!8&vsy@m;^gZPYS=0In3|@ zihiobMy-KH4JnrHj>_K;|JzuS`IK@qTZHsB7+ymOlt>^NajD;!R0C>HGlQ;EX;QQK zO#OGK!J1Z3T$4+_-O(v2S>2N58Cymy&PbAo)A{*BX)<- zuePNi$~t7n^|bnM*(t95=A7`v7T1S1Rh4W0zwYX1TSt8g!K<$*GdyJh1(V~lg(USr z`FWR)-l~@`2Bnp20wNqvNs0V3$6Y`ZeRIRee57vShQFb+-(Ej=%ex(^HMMVX47ER` z#^NHwmOKX>9mWrv%4mHPd2U>?|04nlstE|7^;_AB%ov*8#N@PY7G=&xQ?-TBXp1LJQ)z8;Ji0rW- zrp2}ckH@-8j6|oh$7|rIhyJ6OhN**=$+$~yJ{g(KC5MQ)xXLT-L_K=Hitym#@Ra|$0X>m7VGo$(y9 z0=@Z9Zx^A_9%bNU4+l3E(D<_EE;hJJqC}0f%Dc4@Ym(SD5}K0j3fsL{G|#IFb^5Tp zJK(4;K^&nwXSrz=tZhmiRweI<_#=zX6m(Mpu;O!#JoaQC8fysHXAVv*t;MI4{ZtL- zcl%LnCAhqwZZ2=N&w?RiFe+G|avwY)SEAYD^XMQ)2I!|ZtYw)f|G4pG<)@E{|3ifW z@LR7MnNR8esODR>TbczOu!K6(FU-T#60v0;uPG>>(E&+)Je_;sk;!MzLIx*c9%br4 zqs+U0N@0b{=@Go%uc(*{0jP%6N|l z<%{dMybvV6>_<{way$uFgdbJF$SfUgFCPZ4g@Yf*(Qe9%xvtgTw^FTgAml;y20TjUx3edz;9N zfzJ6_Wz{5UbTI46#WA;nuggAr`e{g*&BuK*_wh;GV@o>&Jg@bw-0m4$xhphZonN!1 zeS>Gc?M36?169uW6}>(;Uzz@L;5poD&3oR@8Tzt0wC5mOQmbw-KxoF~z)pQeRW|7T8aRY5mRmWo&J4m63yY5dI&3YrO5zosOecqedSdKGgb>1$hp9+2U-hUb zNH&_Q)MD|ibHJ1HPeU{KrPf18W{?-<=}+H)v8|8Ce&JfRAgu^e5eSuPBrQ0hUEbQr zC=ib2+v*hq=B^3P^$iuT_-gG4jKM(M0sY=`YT)VJbFd4}i&!_i`Xz6|+M)kdWFNVGsc{hrDll77mA$c&J;=PrSY|A{avPcDc7Azsm~tJl+VFbe^d2}Wgey9y8T2X0T2;= zw#Yb-#`vQayfShPgNE-22e0f7;CuY>%6{8dMf8N+Lz7AVeM>(iF`MTgPo=2cP+)VV zcXHQy!%~8HB_=NNeY4pdP#xHb$#b_!_!Np+=m{q6q^#aYbGCVQC5xzUy)^ZWQ0?`; zeH#_0GSCOkK&_87$MQKfJaD&WV>aVFi`g=NU5ZS11G0u z^xHBgNjIOyM*FvoNn#m{mZw5~oPKq0Ww@f?%k@# zcQP$@2qPB{ck3#{mi$QJah8lyl7H22jiAR#EZB|+^~rWblzS<0fEq32t&6ifIaTo|S0_i7oZVZkIPa6Qm@!!ISaj%gwq@A${`?pLX^-p_a$74)G5 zU=FrL?+a_-T9j_F2faS~?)x5VqTg{I1nV}=w|CL*lC0i?VZfW>mydGMyHETpA=VPS z=fIH+2;?K5$b0oB{m8O105UdacU-4xjP7GVtkDFb6|KU<^_4nHhf7q$HHmK7pw zHin?Ssr+gnD#*xpO)G(f<~2#Kci@=Wbg8q6v-1#Ml^pw+f${W}b@jEWsxzubgUc)O zsF;NMW!f~S&F)65SVT9OUuhlf|O21{z^+}Mq1kkNC`Ri65>w}0h zwi8P90&16U%%s-Oa2BM01;Kh3)Yx0_XQ+RAOE>xSeQfTxh+gc^#epY=)7CWS4uwc{ zFoN|@KaZA+jK>aY=WDg`}Tx5sbX(qVe)19T-J z%aNl!P9Zs{tqj~0uuA2KP`^sA=_d1GT1$L*gp7?!`Z42s6yuy~x}Kj3l;lI8k@ZFT zawugJlD5d!vb?kJQZ6zdPyQ--0+MJB!ZUS*Aeul_A|tO#EsOV_>9RA!{o$NO?M6eV zQGe(YDaUu;eX|v3+tZG8cP{x9ikt{A`tL5L|GoFwer>KGp*v*O!T-aRGOTr|xB7&; zbIvYW+T(iw!r$!3wQ*^!cK%ugW z;@<8UUm}D!MP~#lg@XW3eMpmO*O5naVP!GHN01o%fAxG+7g9N~zXsRWIducQ?z6Kx z_5wK+KH~|Fc8@u(uUvxgL@y@@9Z)~J0Ro``?bYYn_lR0ZC^?VS8vZlZ*XeL)z6jjY z5d%fZ(Q_XV!?PViy`zM$r@Be36$q4d*vZjb?X^WAQCQq1eegl`KA?N{Pq!Y7XZmQ$ z{KU9kyaH}P0B^vhuli}X3r-2pqxY(CI} zxxci?-3u;l1)6^jCN%zP$9vbSYrY+@ri#Hl)NP#_QBk2>OVvLr*lt729@AHwn_T$^~2q(3DdeVq>kIfwZwyx;L1z`+oZ7A6%m@ zU3Rs}YF5?zqx@8rWZnbz3U1t@z6Giwmhftqs*wMusqYSJGU@uxuIsv%M^-_xfb6cJ zDH01!5O77sP&KFsAw;Q43m`4Dpgt^01T={RLX#5Qh=70)YCse)gd#0;BGQBap-M~0 zJ9m7(>-z40Gjry&IrBSbGBf`n=WxcJo~i<8*$Q^1TuIrDUD4-P;XKF~l>2^E zXNVKG2Q$SE;hjNgM~{%sYoy%CpDy?{Vn0YpAU0svGnZYd1-0y%Q1-M%3bKgGY%!f}D)-yt{Cp*qmIqjM`)SsRC zO{s$qXy#m$T-@Pn6pD*uXI zK0<){0GD%#0kxE!N||g!hXfU1CGK|_v#goA^8IyA(Mpa!a!j40ROpR9@SP`WW=ps& zjLk|n3uH>aGM<^dLXa2DfVhf8RL?Rz{sp0Hb)>p!^Ia-GHtSD;RLJ*ypG0+-%7_04 zoB^H3<_nSJwoS!_Y&|uXP2u%UZ~_u*R2_;tk&aw?Ee840f)<^np)DiqHQUW=3K^;eh^=MEx%haX0>iIpzgr{~okpO@PB z{dP2tuUYk%Ol?n8*eC7DS=`=V9;SaZ`K!M$nK}8>!}BL)jD4M4$2>-rUr|%H5q`oA zp7ZNABPahE^Aw#o@>XZM?zwq3_!`b0LzvqLo8!d2!rA=Q{*Cd594*C}guIC!--#sHfQjDH zFJ4IMpCGF5yA1S3xF3KtPwdL`G8p`LOhFgHAYw(6chpk_wYX5Bn0txrB(pM-w?Hai z?Rv1^T>a^{;MkMcUnBZ;4hH|sd?yzSnZQv4_!C>86j&DLPs> z)gTTvSSmVA!G%}4C6PMP1t0z>pgcm*C@9xQ*oNoEkcc_~k7O-aYxA!?X?eNDO)LxZ zauQ|vE5_X14K?;?f^PP*nENa%N)(|E0J5!%B6G!&t{NSv9bR9C?Xarny5ql?8-C8kI!u%N1u9dTi!j z_xb3vSN_+ExlQF9JgN7&0h5g1tu?4Lb0v>|ccj96gu2bLY(H9dM)@qv)Dl_wxpX zn6DilQnd&6l->Io3W*1DnCi&`A(d_&^jt+{#yxF>x-uQKZ`6$o&05PqE{y$mKbyy` zYRDL%0n1R96!RJ`ZH@W;US zNFC2cYnT@a7mSBa@=7KXbcZH%JEk;{;Mi(P*S#4xy=cDPLXRIli~q})n3VC8MCI4x zfV=L}YYptG<+D_(`)@3VfH+!-T>Aj%i_X4vw9q`^M zt%L8E^&gk6SmnbGJQ@2TVi7f-y`IY?WnnMoiEf9@8{hxUr0~k#tz^EwwZ_>C{0#O| zYMgD{N~$DsFc)Wmv|32CyM;+R*Q@BhDOa@7Jv;3XDjZP+?@ek{)y-ilR)RG1F)o=E zgh0(9v&FN$1Kk00{E+h_|C>v9PAx%eR{gXC!^7%4yY6oRlH2dwHKP!)b;R)yqTJNW zqbkNqU%o?B!Jj92`S#yeltufc><$Z)@~qUrj}9D-(7qX!T602$x&;QStgX9=K@*)h zGYa0R)@N2{ts6fB#+atsq)(r1PQZ!Dr%tSH^oSqt1g*oRxYe)wic7v~c* zpp8y?$2e<4NbK`+_^2Jtp(K0eO|)z2G6MzkuQ7oZF9Lp9@h{aGF2ZqBE;?&P^Fv?G za}3w&?KkaU3IF1K+g^nI7LPpgAAJB-A_^)Nw-)ARn6%??SG8SVdk_MWWsu)T9o+LA zwbwG9ga}JyEMP;Q?(OCa^5PtF)Ipc-Lo+}320Wl6`_z%hvy9?w3d!69`>8>WN~d&o z7Af)NeYCDUaB4LOs-y*K7Ck?=?NME}(|otIIt>Kh19^+-4XTF9un_Mnkz{8-Q1yom ze7d_+nK^$!+jQZz_*oc>GuCVu5NYR{crMUj*Osysy&NOk&pm1P0%<^S`QRhuQ^T%X zZ%gf-6dWu!2mQV7q9Ucgh0Ot{Q7!Q_L7|lU|MmvTIH4M?r1zZ=zQo`3fR!fdf+&3V z`7&M&8uI*^rC)A$ujiJLXtwSs?Ni+!RpTrx?~-U8*RbL(^3}itc^m)vP7M zU_+v1_1*R~%$Zoeew}a&Mj2-H_HhQ#d(GjHz;D}Y`b)(i8q$-qmFRGW^PusKq9?`) z+Wzz11dYKAHcm8a{i--yzIi#-ZD^@ZCSYcBTL^b&~UmHe~!lJ8_ANtMC=izn&olJQi5eU2IUY zR+~p)M>XqfpGZ`Rg#HRfEJAYvCCj)Re<(p5d3+ zW^&W7tK||pf#-++d#&_Va^=*G$szxIlmchnPzzjPjj;j42b9wl`e-8q7EE_$lQms)|`Hw}xyghN2!V}`< z`=6UGhlpaTh4_2%s#wdskm1c&t>c>hTC2Vx5q}DMAga`UG<+Hu-^Y4GZz#0%r0W18Zn(X~*mQYT6 zrGZBKc4bjM<#Q^K2)E!PN@N6UH4~d8n0{Ysw37Cw&(A!G)cZJZVsKoQgV>x)Gw2g{ zK{XDYT|1Nqo=h-D`)rxg*gdaAN~($BYKHw`aDM9tiSo9E=_nSGp?^K$v_xa29vfUUkUy`kbu}?}}m|+7E5w4RLtIDJ1 zJ#jAQ-G|2X$^^kbdkk%v+UyQ)m&D6u7YcdO;r#w0pQnw5y9dG+a+O=hkzac&THaL) zpdJJN)NaSkY?8aqAyLFgcK@-F5Iq8wi!Q+%#11T|x1OJG8hqvjp_2$Im`d)5D)t~w z)K2E9AQXw!v~PGNNN*@!qGGW;XE|OfMUcO9tW1E+Tw9U9p9j4{2K#bshjL}Xa=fX( zR%}jo6R0^z%_1-UeZ6^2>a5lyoZ~yoRO$I&eG)YoWBf~`O12wm zt?3$7Z_{s1II2q96_#@8T4d9r+I5>^#pjy_})6B zn38Xh5lYM2#UCCC43YryHW3Awd^2|p&Y1af{J!xa&PP2n(u}%vYof{# z2eo4S5>bK0+yxDnf1`xF4cNC?uZYs8&a&QQ!Sj)U40E04dk>%XcRCRX8 zn}xfsbuve%^jBw#@YZJzuLpC+7SQ9)>x149rHbo$>)wN5BpY?!EsPS9;rv|q=tAR> z<*g)mk_8EltSFXI+ASBC{sCULuo0mW9VP482A$)d`=`-=ODnEoWPPjG3wSM&04kdO#+b=p!^nf_w8pQOROWuj(CGNysO45m!@oHpvajGJc=8_1n zUVT45le76{5}yR0EdT!X1Ihx`VZSDQfKh&h>tcF2B_%^k%U+9E;LyX!NS5%ZWv67== zaV}nsFrOxY6R^3G#{SKpA;NU7ja%OY*C)6;ckzIBxaoU!r?J3Ps+xPQYeSz+I~vD+0kGk} zvvGFnJpAlQutvio230GBb?W7oqyTEQ`an%vVEmFn=($yDErwYY&hrBm|mkPgXqV`%Hds+f)8NTnIzek>`*#dj`b}IUBUYtfNGC5Ei-iQ z_6N4*Eh1Z=tvWS~`1maYA?CO0>|*g*fff;v*+ zo5>cE5-`Ea_^^qBfqt5@zt)my?-)gpezm&AIe6T5Mo$`SlUXj>fNq=~;& z8RxII0oh^GpTU*A?5;s&qkKm;*~32$p zK}^Jh)YmIt?nwxd(>|((;l6mTzT;@f_c<^Jvj{CwEOCZPtoD?|A_vZcpxcdy94+1@ zzPy-p-DEdFzCx*?kaspJyl;J3NOOIBT`P_^$3KCULrk``Yajz{_8@7_B$cNF61!;J zb7%U0+L&v7Wv8xyV+q$3Bv===D}9 )`uVzINSB%Zt*!PWV^^4oG9=@B9P(zQzd zzmnaGlP|%7; z;ySjj0HeKGF7!7fG>mX)St?m_|DCzOzLKgiQXl6Q>cp$&1%+FWD-zR04J+f2&))Xl zW&;9?aJsU&JCUAM^?lp&@#|3<_FD3)pj8-lkMmtALp@vv-JFe_+S%H7APta;QRplE z;ewCl{D!lbgAW!Kz6IT^A94sSdog9wn@QX&E-~?sEy4_DR6UA-stGRsfVx0M8zm_#A^!4t`c!C zJAd0g{eifZVqbmRUuJFJ))%8X#AB`du8^FH#<>5^Qti|Yv+byo5c1f(@W(i4Hx~xI zY9WP(BCSUUK74h~eyXt5wAqH_x6S@G9x&4H$M-C8V*{MgW>0`b8w*12C*=LGof{Z( zATp<9ZVJPkb`Cu=6qpCU=hjjV@0VAr3X`!t5*{$5gzTWtke1|+(nm)f!qF*5Gb_XE zV-9PFAM}?PR4!s+UE#F6th`E?@&)nbxw^)iY1Ic}c){oV@%U{e@xQx!8lN%*BOk7Q zG~YMc_GiEXWEiyD2hb2Be z3>Ng#`Qw!G)kVSaAA`~|5y?vsdn~Ine7C-E_ZQ)>_LbSGQ#m*q^)*KV%bvYRS3ar5 z+nlZhB|SHfk*zg{+npsKYfi=@&&TFGV`l7Ck~MM67j)#cn97&|4q!?bhDSA7qFy|@ zX!g9S4XzH=7+hus2#+ls!bn1_73!bVnm^a9(^7z@k;O5xHEqX1&{qpz7p=q_wL(j9 z#NaiZvPjlW#T_azV@8&G_z#wDlW3s8_gKAKFf@_fLe>A(sQ}6x+-Mlf-Jh=xQFioXi>XUHb#iAf|1y9!k9Nl$FlG zE7fo>N{V&CR4|ZRvi4=`ol`4mqFyKwC6oR zkxGno#Z)n_1YurrChIQ1BntudU%? zg;r>Vf@#0>_N`%w*6?VSdJ*LvqRMTsVrriU#H``Nd6JtcDLvq6cdG`OG)E+dv~;PgBIS}N632n!zy$8g?swtqh!r>W`eu80C2Aq8j3OSXqX z(!G7N)-5P-24kkC;B%XJ-j71RLU)!XEYimL3`@Sy=AI~^S zhnIZcTQ?GOsX*c;TfmHiHQWg^Xskd^d5Dmn#swM2qrbAgp7!yAJP;OT4DxRe1&wNZ zm#Tq2fDVjeoNZ(zBz3$&b)oh?i1(gu<|fD~Zo3B*pAKsqd@1#mh?cNFQu;DOaD~mi zvuM=3HRy?hgeIBpUHg9Z+hR42oJHA8ro0nv%;}fevC=9j<=@I(*2n@;rX=;bT`Oau zbC=&j$u@69ns{0D*Z`SQiTVvnTs(IqdPw@4nK1T|3uTszy&$WD~?IEyo}Qg88!Dcrvq9%nDl> z9OdWp)yzrio~p<}TkCkvt9WJq=PDFP9W^;B$u;%CN%@2;9UACC)nfU-T_aS6l%`Z9 zRM1y-9(1pEivz;EOmh9XimJ;C1C;PYTXh8kY#Lpk9*6Cj!ghMUfD(_H!>lhYgg1wF;m3Ps9#$`~{+%d99KQi=l&8BK{ zjePK8Mk6ul%)zv&(w`;Defp?&`_qp%F1;8ne2XfycM&Y3wK1dhQVx}fXBy@tm1mN^ zJwS{HJ?d}Z>>G9a>hDy9z}xanc*j*2#Vf;$MRu0>g< z?Qr&Kq>N-|=gS-ze^0sE+c*B0ZY}e6e?(xRhv%smwrTyoZf;K7cJHHz{peKYtasUT%{P?$eP8p2%N3lKxEuonT`$DP0PT~g@(^kav)&N5 W9Nu-N2|Nx2eaK4|7xOOMe)xYmUaqhJ literal 0 HcmV?d00001 diff --git a/website/static/img/logo/svg/colored_logo.svg b/website/static/img/logo/svg/colored_logo.svg new file mode 100644 index 00000000..3b136ac4 --- /dev/null +++ b/website/static/img/logo/svg/colored_logo.svg @@ -0,0 +1,19 @@ + + + \ No newline at end of file diff --git a/website/tsconfig.json b/website/tsconfig.json new file mode 100644 index 00000000..d250afae --- /dev/null +++ b/website/tsconfig.json @@ -0,0 +1,6 @@ +{ + "extends": "@docusaurus/tsconfig", + "compilerOptions": { + "baseUrl": "." + } +} From d71d6d6f1c8ced2811d0f16b9b43e32372943ada Mon Sep 17 00:00:00 2001 From: Keith Lee Date: Thu, 12 Feb 2026 21:29:06 +0000 Subject: [PATCH 2/4] Update draft doc --- website/docs/user-guide/cpp/api-reference.md | 10 ++--- website/docs/user-guide/cpp/error-handling.md | 6 +-- .../cpp/example/admin-operations.md | 2 +- .../user-guide/cpp/example/configuration.md | 10 ++--- website/docs/user-guide/cpp/example/index.md | 2 +- .../docs/user-guide/cpp/example/log-tables.md | 4 +- website/docs/user-guide/python/data-types.md | 2 +- .../python/example/admin-operations.md | 2 +- .../python/example/configuration.md | 4 +- .../docs/user-guide/python/example/index.md | 8 ++-- .../user-guide/python/example/log-tables.md | 16 +++---- .../python/example/partitioned-tables.md | 8 ++-- .../python/example/primary-key-tables.md | 6 +-- website/docs/user-guide/rust/api-reference.md | 12 +++--- website/docs/user-guide/rust/data-types.md | 11 +++++ .../docs/user-guide/rust/error-handling.md | 43 +++++++++++++------ .../rust/example/admin-operations.md | 4 +- .../user-guide/rust/example/configuration.md | 6 +-- website/docs/user-guide/rust/example/index.md | 2 +- 19 files changed, 94 insertions(+), 64 deletions(-) diff --git a/website/docs/user-guide/cpp/api-reference.md b/website/docs/user-guide/cpp/api-reference.md index 4bbabfb5..15f2a470 100644 --- a/website/docs/user-guide/cpp/api-reference.md +++ b/website/docs/user-guide/cpp/api-reference.md @@ -17,13 +17,13 @@ Complete API reference for the Fluss C++ client. | Field | Type | Default | Description | |---|---|---|---| -| `bootstrap_server` | `std::string` | `"127.0.0.1:9123"` | Coordinator server address | -| `request_max_size` | `int32_t` | `10485760` (10 MB) | Maximum request size in bytes | +| `bootstrap_servers` | `std::string` | `"127.0.0.1:9123"` | Coordinator server address | +| `writer_request_max_size` | `int32_t` | `10485760` (10 MB) | Maximum request size in bytes | | `writer_acks` | `std::string` | `"all"` | Acknowledgment setting (`"all"`, `"0"`, `"1"`, or `"-1"`) | | `writer_retries` | `int32_t` | `INT32_MAX` | Number of retries on failure | | `writer_batch_size` | `int32_t` | `2097152` (2 MB) | Batch size for writes in bytes | | `scanner_remote_log_prefetch_num` | `size_t` | `4` | Number of remote log segments to prefetch | -| `scanner_remote_log_download_threads` | `size_t` | `3` | Number of threads for remote log downloads | +| `remote_file_download_thread_num` | `size_t` | `3` | Number of threads for remote log downloads | ## `Connection` @@ -52,7 +52,7 @@ Complete API reference for the Fluss C++ client. |---|---| | `CreateTable(const TablePath& path, const TableDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a table | | `DropTable(const TablePath& path, bool ignore_if_not_exists) -> Result` | Drop a table | -| `GetTable(const TablePath& path, TableInfo& out) -> Result` | Get table metadata | +| `GetTableInfo(const TablePath& path, TableInfo& out) -> Result` | Get table metadata | | `ListTables(const std::string& database_name, std::vector& out) -> Result` | List tables in a database | | `TableExists(const TablePath& path, bool& out) -> Result` | Check if a table exists | @@ -117,7 +117,7 @@ Complete API reference for the Fluss C++ client. | `ProjectByIndex(std::vector column_indices) -> TableScan&` | Project columns by index | | `ProjectByName(std::vector column_names) -> TableScan&` | Project columns by name | | `CreateLogScanner(LogScanner& out) -> Result` | Create a record-based log scanner | -| `CreateRecordBatchScanner(LogScanner& out) -> Result` | Create an Arrow RecordBatch-based log scanner | +| `CreateRecordBatchLogScanner(LogScanner& out) -> Result` | Create an Arrow RecordBatch-based log scanner | ## `AppendWriter` diff --git a/website/docs/user-guide/cpp/error-handling.md b/website/docs/user-guide/cpp/error-handling.md index bad82291..0c4fe7ec 100644 --- a/website/docs/user-guide/cpp/error-handling.md +++ b/website/docs/user-guide/cpp/error-handling.md @@ -38,7 +38,7 @@ static void check(const char* step, const fluss::Result& r) { // Usage fluss::Configuration config; -config.bootstrap_server = "127.0.0.1:9123"; +config.bootstrap_servers = "127.0.0.1:9123"; check("create", fluss::Connection::Create(config, conn)); check("create_table", admin.CreateTable(table_path, descriptor, true)); check("flush", writer.Flush()); @@ -55,7 +55,7 @@ if (!conn.Available()) { } fluss::Configuration config; -config.bootstrap_server = "127.0.0.1:9123"; +config.bootstrap_servers = "127.0.0.1:9123"; fluss::Result result = fluss::Connection::Create(config, conn); if (result.Ok() && conn.Available()) { // Connection is ready to use @@ -70,7 +70,7 @@ The cluster is not running or the address is incorrect: ```cpp fluss::Configuration config; -config.bootstrap_server = "127.0.0.1:9123"; +config.bootstrap_servers = "127.0.0.1:9123"; fluss::Connection conn; fluss::Result result = fluss::Connection::Create(config, conn); if (!result.Ok()) { diff --git a/website/docs/user-guide/cpp/example/admin-operations.md b/website/docs/user-guide/cpp/example/admin-operations.md index 2016cb0a..bff55e80 100644 --- a/website/docs/user-guide/cpp/example/admin-operations.md +++ b/website/docs/user-guide/cpp/example/admin-operations.md @@ -33,7 +33,7 @@ check("create_table", admin.CreateTable(table_path, descriptor, true)); // Get table information fluss::TableInfo table_info; -check("get_table", admin.GetTable(table_path, table_info)); +check("get_table", admin.GetTableInfo(table_path, table_info)); std::cout << "Table ID: " << table_info.table_id << std::endl; std::cout << "Number of buckets: " << table_info.num_buckets << std::endl; std::cout << "Has primary key: " << table_info.has_primary_key << std::endl; diff --git a/website/docs/user-guide/cpp/example/configuration.md b/website/docs/user-guide/cpp/example/configuration.md index f576eb3c..076710ee 100644 --- a/website/docs/user-guide/cpp/example/configuration.md +++ b/website/docs/user-guide/cpp/example/configuration.md @@ -9,7 +9,7 @@ sidebar_position: 2 #include "fluss.hpp" fluss::Configuration config; -config.bootstrap_server = "127.0.0.1:9123"; +config.bootstrap_servers = "127.0.0.1:9123"; fluss::Connection conn; fluss::Result result = fluss::Connection::Create(config, conn); @@ -21,17 +21,17 @@ if (!result.Ok()) { ## Configuration Options -All fields have sensible defaults. Only `bootstrap_server` typically needs to be set. +All fields have sensible defaults. Only `bootstrap_servers` typically needs to be set. ```cpp fluss::Configuration config; -config.bootstrap_server = "127.0.0.1:9123"; // Coordinator address -config.request_max_size = 10 * 1024 * 1024; // Max request size (10 MB) +config.bootstrap_servers = "127.0.0.1:9123"; // Coordinator address +config.writer_request_max_size = 10 * 1024 * 1024; // Max request size (10 MB) config.writer_acks = "all"; // Wait for all replicas config.writer_retries = std::numeric_limits::max(); // Retry on failure config.writer_batch_size = 2 * 1024 * 1024; // Batch size (2 MB) config.scanner_remote_log_prefetch_num = 4; // Remote log prefetch count -config.scanner_remote_log_download_threads = 3; // Download threads +config.remote_file_download_thread_num = 3; // Download threads ``` ## Error Handling diff --git a/website/docs/user-guide/cpp/example/index.md b/website/docs/user-guide/cpp/example/index.md index e6b59f72..5afdb5b5 100644 --- a/website/docs/user-guide/cpp/example/index.md +++ b/website/docs/user-guide/cpp/example/index.md @@ -19,7 +19,7 @@ static void check(const char* step, const fluss::Result& r) { int main() { // Connect fluss::Configuration config; - config.bootstrap_server = "127.0.0.1:9123"; + config.bootstrap_servers = "127.0.0.1:9123"; fluss::Connection conn; check("create", fluss::Connection::Create(config, conn)); diff --git a/website/docs/user-guide/cpp/example/log-tables.md b/website/docs/user-guide/cpp/example/log-tables.md index 8b4a840e..84bcbb0f 100644 --- a/website/docs/user-guide/cpp/example/log-tables.md +++ b/website/docs/user-guide/cpp/example/log-tables.md @@ -77,7 +77,7 @@ check("subscribe_batch", scanner.Subscribe(subscriptions)); #include fluss::LogScanner arrow_scanner; -check("new_scanner", table.NewScan().CreateRecordBatchScanner(arrow_scanner)); +check("new_scanner", table.NewScan().CreateRecordBatchLogScanner(arrow_scanner)); for (int b = 0; b < info.num_buckets; ++b) { check("subscribe", arrow_scanner.Subscribe(b, 0)); @@ -113,5 +113,5 @@ check("new_scanner", // Arrow RecordBatch with projection fluss::LogScanner projected_arrow_scanner; check("new_scanner", - table.NewScan().ProjectByIndex({0, 2}).CreateRecordBatchScanner(projected_arrow_scanner)); + table.NewScan().ProjectByIndex({0, 2}).CreateRecordBatchLogScanner(projected_arrow_scanner)); ``` diff --git a/website/docs/user-guide/python/data-types.md b/website/docs/user-guide/python/data-types.md index 2976ad0c..fed5b748 100644 --- a/website/docs/user-guide/python/data-types.md +++ b/website/docs/user-guide/python/data-types.md @@ -7,7 +7,7 @@ The Python client uses PyArrow types for schema definitions: | PyArrow Type | Fluss Type | Python Type | |---|---|---| -| `pa.boolean()` | Boolean | `bool` | +| `pa.bool_()` | Boolean | `bool` | | `pa.int8()` / `int16()` / `int32()` / `int64()` | TinyInt / SmallInt / Int / BigInt | `int` | | `pa.float32()` / `float64()` | Float / Double | `float` | | `pa.string()` | String | `str` | diff --git a/website/docs/user-guide/python/example/admin-operations.md b/website/docs/user-guide/python/example/admin-operations.md index 226284e7..e905bfd5 100644 --- a/website/docs/user-guide/python/example/admin-operations.md +++ b/website/docs/user-guide/python/example/admin-operations.md @@ -32,7 +32,7 @@ schema = fluss.Schema(pa.schema([ table_path = fluss.TablePath("my_database", "my_table") await admin.create_table(table_path, fluss.TableDescriptor(schema), ignore_if_exists=True) -table_info = await admin.get_table(table_path) +table_info = await admin.get_table_info(table_path) tables = await admin.list_tables("my_database") await admin.drop_table(table_path, ignore_if_not_exists=True) ``` diff --git a/website/docs/user-guide/python/example/configuration.md b/website/docs/user-guide/python/example/configuration.md index 1de82d3a..b7db70d6 100644 --- a/website/docs/user-guide/python/example/configuration.md +++ b/website/docs/user-guide/python/example/configuration.md @@ -7,13 +7,13 @@ sidebar_position: 2 import fluss config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"}) -conn = await fluss.FlussConnection.connect(config) +conn = await fluss.FlussConnection.create(config) ``` The connection also supports context managers: ```python -with await fluss.FlussConnection.connect(config) as conn: +with await fluss.FlussConnection.create(config) as conn: ... ``` diff --git a/website/docs/user-guide/python/example/index.md b/website/docs/user-guide/python/example/index.md index 67ee9202..389b6486 100644 --- a/website/docs/user-guide/python/example/index.md +++ b/website/docs/user-guide/python/example/index.md @@ -13,7 +13,7 @@ import fluss async def main(): # Connect config = fluss.Config({"bootstrap.servers": "127.0.0.1:9123"}) - conn = await fluss.FlussConnection.connect(config) + conn = await fluss.FlussConnection.create(config) admin = await conn.get_admin() # Create a log table @@ -27,14 +27,14 @@ async def main(): # Write table = await conn.get_table(table_path) - writer = await table.new_append_writer() + writer = table.new_append().create_writer() writer.append({"id": 1, "name": "Alice", "score": 95.5}) writer.append({"id": 2, "name": "Bob", "score": 87.0}) await writer.flush() # Read - num_buckets = (await admin.get_table(table_path)).num_buckets - scanner = await table.new_scan().create_batch_scanner() + num_buckets = (await admin.get_table_info(table_path)).num_buckets + scanner = await table.new_scan().create_record_batch_log_scanner() scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) print(scanner.to_pandas()) diff --git a/website/docs/user-guide/python/example/log-tables.md b/website/docs/user-guide/python/example/log-tables.md index 40e232de..25ba326b 100644 --- a/website/docs/user-guide/python/example/log-tables.md +++ b/website/docs/user-guide/python/example/log-tables.md @@ -28,7 +28,7 @@ Write methods like `append()` and `write_arrow_batch()` return a `WriteResultHan ```python table = await conn.get_table(table_path) -writer = await table.new_append_writer() +writer = table.new_append().create_writer() # Fire-and-forget: queue writes, flush at the end writer.append({"id": 1, "name": "Alice", "score": 95.5}) @@ -49,7 +49,7 @@ await writer.flush() ## Reading There are two scanner types: -- **Batch scanner** (`create_batch_scanner()`) — returns Arrow Tables or DataFrames, best for analytics +- **Batch scanner** (`create_record_batch_log_scanner()`) — returns Arrow Tables or DataFrames, best for analytics - **Record scanner** (`create_log_scanner()`) — returns individual records with metadata (offset, timestamp, change type), best for streaming And two reading modes: @@ -59,9 +59,9 @@ And two reading modes: ### Batch Read (One-Shot) ```python -num_buckets = (await admin.get_table(table_path)).num_buckets +num_buckets = (await admin.get_table_info(table_path)).num_buckets -scanner = await table.new_scan().create_batch_scanner() +scanner = await table.new_scan().create_record_batch_log_scanner() scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)}) # Reads everything up to current latest offset, then returns @@ -75,7 +75,7 @@ Use `poll_arrow()` or `poll()` in a loop for streaming consumption: ```python # Batch scanner: poll as Arrow Tables -scanner = await table.new_scan().create_batch_scanner() +scanner = await table.new_scan().create_record_batch_log_scanner() scanner.subscribe(bucket_id=0, start_offset=fluss.EARLIEST_OFFSET) while True: @@ -97,14 +97,14 @@ while True: To only consume new records (skip existing data), use `LATEST_OFFSET`: ```python -scanner = await table.new_scan().create_batch_scanner() +scanner = await table.new_scan().create_record_batch_log_scanner() scanner.subscribe(bucket_id=0, start_offset=fluss.LATEST_OFFSET) ``` ## Column Projection ```python -scanner = await table.new_scan().project([0, 2]).create_batch_scanner() +scanner = await table.new_scan().project([0, 2]).create_record_batch_log_scanner() # or by name -scanner = await table.new_scan().project_by_name(["id", "score"]).create_batch_scanner() +scanner = await table.new_scan().project_by_name(["id", "score"]).create_record_batch_log_scanner() ``` diff --git a/website/docs/user-guide/python/example/partitioned-tables.md b/website/docs/user-guide/python/example/partitioned-tables.md index 8b3eb1f3..41ee8bb3 100644 --- a/website/docs/user-guide/python/example/partitioned-tables.md +++ b/website/docs/user-guide/python/example/partitioned-tables.md @@ -37,7 +37,7 @@ Same as non-partitioned tables - include partition column values in each row. ** ```python table = await conn.get_table(table_path) -writer = await table.new_append_writer() +writer = table.new_append().create_writer() writer.append({"id": 1, "region": "US", "value": 100}) writer.append({"id": 2, "region": "EU", "value": 200}) await writer.flush() @@ -48,7 +48,7 @@ await writer.flush() Use `subscribe_partition()` or `subscribe_partition_buckets()` instead of `subscribe()`: ```python -scanner = await table.new_scan().create_batch_scanner() +scanner = await table.new_scan().create_record_batch_log_scanner() # Subscribe to individual partitions for p in partition_infos: @@ -86,11 +86,11 @@ await admin.create_table( await admin.create_partition(table_path, {"region": "US"}, ignore_if_exists=True) table = await conn.get_table(table_path) -writer = table.new_upsert() +writer = table.new_upsert().create_writer() writer.upsert({"user_id": 1, "region": "US", "score": 1234}) await writer.flush() # Lookup includes partition columns -lookuper = table.new_lookup() +lookuper = table.new_lookup().create_lookuper() result = await lookuper.lookup({"user_id": 1, "region": "US"}) ``` diff --git a/website/docs/user-guide/python/example/primary-key-tables.md b/website/docs/user-guide/python/example/primary-key-tables.md index 13fc05e8..cd61e508 100644 --- a/website/docs/user-guide/python/example/primary-key-tables.md +++ b/website/docs/user-guide/python/example/primary-key-tables.md @@ -30,7 +30,7 @@ await admin.create_table(table_path, fluss.TableDescriptor(schema, bucket_count= table = await conn.get_table(table_path) # Upsert (fire-and-forget, flush at the end) -writer = table.new_upsert() +writer = table.new_upsert().create_writer() writer.upsert({"id": 1, "name": "Alice", "age": 25}) writer.upsert({"id": 2, "name": "Bob", "age": 30}) await writer.flush() @@ -44,7 +44,7 @@ handle = writer.delete({"id": 2}) await handle.wait() # Lookup -lookuper = table.new_lookup() +lookuper = table.new_lookup().create_lookuper() result = await lookuper.lookup({"id": 1}) if result: print(f"Found: name={result['name']}, age={result['age']}") @@ -55,7 +55,7 @@ if result: Update specific columns while preserving others: ```python -partial_writer = table.new_upsert(columns=["id", "age"]) +partial_writer = table.new_upsert().partial_update_by_name(["id", "age"]).create_writer() partial_writer.upsert({"id": 1, "age": 27}) # only updates age await partial_writer.flush() ``` diff --git a/website/docs/user-guide/rust/api-reference.md b/website/docs/user-guide/rust/api-reference.md index 55841cb1..b5301262 100644 --- a/website/docs/user-guide/rust/api-reference.md +++ b/website/docs/user-guide/rust/api-reference.md @@ -9,13 +9,13 @@ Complete API reference for the Fluss Rust client. | Field | Type | Default | Description | |---|---|---|---| -| `bootstrap_server` | `String` | `"127.0.0.1:9123"` | Coordinator server address | -| `request_max_size` | `i32` | `10485760` (10 MB) | Maximum request size in bytes | +| `bootstrap_servers` | `String` | `"127.0.0.1:9123"` | Coordinator server address | +| `writer_request_max_size` | `i32` | `10485760` (10 MB) | Maximum request size in bytes | | `writer_acks` | `String` | `"all"` | Acknowledgment setting (`"all"` waits for all replicas) | | `writer_retries` | `i32` | `i32::MAX` | Number of retries on failure | | `writer_batch_size` | `i32` | `2097152` (2 MB) | Batch size for writes in bytes | | `scanner_remote_log_prefetch_num` | `usize` | `4` | Number of remote log segments to prefetch | -| `scanner_remote_log_download_threads` | `usize` | `3` | Number of threads for remote log downloads | +| `remote_file_download_thread_num` | `usize` | `3` | Number of threads for remote log downloads | ## `FlussConnection` @@ -32,7 +32,7 @@ Complete API reference for the Fluss Rust client. | Method | Description | |---|---| -| `async fn create_database(&self, name: &str, ignore_if_exists: bool, descriptor: Option<&DatabaseDescriptor>) -> Result<()>` | Create a database | +| `async fn create_database(&self, name: &str, descriptor: Option<&DatabaseDescriptor>, ignore_if_exists: bool) -> Result<()>` | Create a database | | `async fn drop_database(&self, name: &str, ignore_if_not_exists: bool, cascade: bool) -> Result<()>` | Drop a database | | `async fn list_databases(&self) -> Result>` | List all databases | | `async fn database_exists(&self, name: &str) -> Result` | Check if a database exists | @@ -44,7 +44,7 @@ Complete API reference for the Fluss Rust client. |---|---| | `async fn create_table(&self, table_path: &TablePath, descriptor: &TableDescriptor, ignore_if_exists: bool) -> Result<()>` | Create a table | | `async fn drop_table(&self, table_path: &TablePath, ignore_if_not_exists: bool) -> Result<()>` | Drop a table | -| `async fn get_table(&self, table_path: &TablePath) -> Result` | Get table metadata | +| `async fn get_table_info(&self, table_path: &TablePath) -> Result` | Get table metadata | | `async fn list_tables(&self, database_name: &str) -> Result>` | List tables in a database | | `async fn table_exists(&self, table_path: &TablePath) -> Result` | Check if a table exists | @@ -357,7 +357,7 @@ Implements the `InternalRow` trait (see below). | `fn get_float(&self, idx: usize) -> f32` | Get float value | | `fn get_double(&self, idx: usize) -> f64` | Get double value | | `fn get_string(&self, idx: usize) -> &str` | Get string value | -| `fn get_decimal(&self, idx: usize, precision: u32, scale: u32) -> Decimal` | Get decimal value | +| `fn get_decimal(&self, idx: usize, precision: usize, scale: usize) -> Decimal` | Get decimal value | | `fn get_date(&self, idx: usize) -> Date` | Get date value | | `fn get_time(&self, idx: usize) -> Time` | Get time value | | `fn get_timestamp_ntz(&self, idx: usize, precision: u32) -> TimestampNtz` | Get timestamp value | diff --git a/website/docs/user-guide/rust/data-types.md b/website/docs/user-guide/rust/data-types.md index 8b374ca5..c4f46cb2 100644 --- a/website/docs/user-guide/rust/data-types.md +++ b/website/docs/user-guide/rust/data-types.md @@ -44,3 +44,14 @@ let ts_ltz = TimestampLtz::new(1704067200000); // Decimal: from an unscaled long value with precision and scale let decimal = Decimal::from_unscaled_long(12345, 10, 2)?; // represents 123.45 ``` + +## Creating Rows from Data + +`GenericRow::from_data` accepts a `Vec`. Because multiple crates implement `From<&str>`, Rust cannot infer the target type from `.into()` alone. Annotate the vector type explicitly: + +```rust +use fluss::row::{Datum, GenericRow}; + +let data: Vec = vec![1i32.into(), "hello".into()]; +let row = GenericRow::from_data(data); +``` diff --git a/website/docs/user-guide/rust/error-handling.md b/website/docs/user-guide/rust/error-handling.md index eb7d8e22..edb0c734 100644 --- a/website/docs/user-guide/rust/error-handling.md +++ b/website/docs/user-guide/rust/error-handling.md @@ -27,12 +27,12 @@ match result { Ok(val) => { // handle success } - Err(Error::InvalidTableError { message }) => { - eprintln!("Invalid table: {}", message); - } - Err(Error::RpcError { message, source }) => { + Err(Error::RpcError { message, .. }) => { eprintln!("RPC failure: {}", message); } + Err(Error::UnsupportedOperation { message }) => { + eprintln!("Unsupported: {}", message); + } Err(Error::FlussAPIError { api_error }) => { eprintln!("Server error: {}", api_error); } @@ -49,17 +49,32 @@ match result { | `UnexpectedError` | General unexpected errors with a message and optional source | | `IoUnexpectedError` | I/O errors (network, file system) | | `RemoteStorageUnexpectedError` | Remote storage errors (OpenDAL backend failures) | -| `InvalidTableError` | Invalid table configuration or table not found | | `RpcError` | RPC communication failures (connection refused, timeout) | | `RowConvertError` | Row conversion failures (type mismatch, invalid data) | | `ArrowError` | Arrow data handling errors (schema mismatch, encoding) | | `IllegalArgument` | Invalid arguments passed to an API method | -| `InvalidPartition` | Invalid partition configuration | -| `PartitionNotExist` | Partition does not exist | | `UnsupportedOperation` | Operation not supported on the table type | -| `LeaderNotAvailable` | Leader not available for the requested bucket | | `FlussAPIError` | Server-side API errors returned by the Fluss cluster | +Server side errors are represented as `FlussAPIError` with a specific error code. Use the `api_error()` helper to match them ergonomically: + +```rust +use fluss::error::FlussError; + +match result { + Err(ref e) if e.api_error() == Some(FlussError::InvalidTableException) => { + eprintln!("Invalid table: {}", e); + } + Err(ref e) if e.api_error() == Some(FlussError::PartitionNotExists) => { + eprintln!("Partition does not exist: {}", e); + } + Err(ref e) if e.api_error() == Some(FlussError::LeaderNotAvailableException) => { + eprintln!("Leader not available: {}", e); + } + _ => {} +} +``` + ## Common Error Scenarios ### Connection Refused @@ -81,10 +96,12 @@ match result { The table does not exist or has been dropped. ```rust +use fluss::error::FlussError; + let result = conn.get_table(&table_path).await; match result { - Err(Error::InvalidTableError { message }) => { - eprintln!("Table not found: {}", message); + Err(ref e) if e.api_error() == Some(FlussError::TableNotExist) => { + eprintln!("Table not found: {}", e); } _ => {} } @@ -95,10 +112,12 @@ match result { The partition does not exist on a partitioned table. ```rust +use fluss::error::FlussError; + let result = admin.drop_partition(&table_path, &spec, false).await; match result { - Err(Error::PartitionNotExist { .. }) => { - eprintln!("Partition does not exist"); + Err(ref e) if e.api_error() == Some(FlussError::PartitionNotExists) => { + eprintln!("Partition does not exist: {}", e); } _ => {} } diff --git a/website/docs/user-guide/rust/example/admin-operations.md b/website/docs/user-guide/rust/example/admin-operations.md index 631d9196..7fcc4017 100644 --- a/website/docs/user-guide/rust/example/admin-operations.md +++ b/website/docs/user-guide/rust/example/admin-operations.md @@ -13,7 +13,7 @@ let admin = conn.get_admin().await?; ```rust // Create database -admin.create_database("my_database", true, None).await?; +admin.create_database("my_database", None, true).await?; // List all databases let databases = admin.list_databases().await?; @@ -50,7 +50,7 @@ let table_path = TablePath::new("my_database", "my_table"); admin.create_table(&table_path, &table_descriptor, true).await?; // Get table information -let table_info = admin.get_table(&table_path).await?; +let table_info = admin.get_table_info(&table_path).await?; println!("Table: {}", table_info); // List tables in database diff --git a/website/docs/user-guide/rust/example/configuration.md b/website/docs/user-guide/rust/example/configuration.md index 2df736f3..4ab04adb 100644 --- a/website/docs/user-guide/rust/example/configuration.md +++ b/website/docs/user-guide/rust/example/configuration.md @@ -8,7 +8,7 @@ use fluss::client::FlussConnection; use fluss::config::Config; let mut config = Config::default(); -config.bootstrap_server = "127.0.0.1:9123".to_string(); +config.bootstrap_servers = "127.0.0.1:9123".to_string(); let conn = FlussConnection::new(config).await?; ``` @@ -17,8 +17,8 @@ let conn = FlussConnection::new(config).await?; | Option | Description | Default | |--------|-------------|---------| -| `bootstrap_server` | Coordinator server address | `127.0.0.1:9123` | -| `request_max_size` | Maximum request size in bytes | 10 MB | +| `bootstrap_servers` | Coordinator server address | `127.0.0.1:9123` | +| `writer_request_max_size` | Maximum request size in bytes | 10 MB | | `writer_acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | | `writer_retries` | Number of retries on failure | `i32::MAX` | | `writer_batch_size` | Batch size for writes | 2 MB | diff --git a/website/docs/user-guide/rust/example/index.md b/website/docs/user-guide/rust/example/index.md index b1c99746..dcee87b0 100644 --- a/website/docs/user-guide/rust/example/index.md +++ b/website/docs/user-guide/rust/example/index.md @@ -17,7 +17,7 @@ use std::time::Duration; async fn main() -> Result<()> { // Connect let mut config = Config::default(); - config.bootstrap_server = "127.0.0.1:9123".to_string(); + config.bootstrap_servers = "127.0.0.1:9123".to_string(); let conn = FlussConnection::new(config).await?; let admin = conn.get_admin().await?; From d3df8065e5edea8460ebff6caba5e6220627222e Mon Sep 17 00:00:00 2001 From: Keith Lee Date: Thu, 12 Feb 2026 22:30:00 +0000 Subject: [PATCH 3/4] Update draft doc --- website/docs/user-guide/cpp/api-reference.md | 2 +- .../docs/user-guide/python/example/log-tables.md | 2 +- website/docs/user-guide/rust/error-handling.md | 14 ++++++++++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/website/docs/user-guide/cpp/api-reference.md b/website/docs/user-guide/cpp/api-reference.md index 15f2a470..1fb50e15 100644 --- a/website/docs/user-guide/cpp/api-reference.md +++ b/website/docs/user-guide/cpp/api-reference.md @@ -366,7 +366,7 @@ When using `table.NewRow()`, the `Set()` method auto-routes to the correct type | Method | Description | |---|---| -| `Time::FromMillis(int64_t millis)` | Create from milliseconds since midnight | +| `Time::FromMillis(int32_t millis)` | Create from milliseconds since midnight | | `Time::FromHMS(int hour, int minute, int second)` | Create from hour, minute, second | | `Hour() -> int` | Get hour | | `Minute() -> int` | Get minute | diff --git a/website/docs/user-guide/python/example/log-tables.md b/website/docs/user-guide/python/example/log-tables.md index 25ba326b..88e7152c 100644 --- a/website/docs/user-guide/python/example/log-tables.md +++ b/website/docs/user-guide/python/example/log-tables.md @@ -54,7 +54,7 @@ There are two scanner types: And two reading modes: - **`to_arrow()` / `to_pandas()`** — reads all data from subscribed buckets up to the current latest offset, then returns. Best for one-shot batch reads. -- **`poll_arrow()` / `poll()` / `poll_batches()`** — returns whatever data is available within the timeout, then returns. Call in a loop for continuous streaming. +- **`poll_arrow()` / `poll()` / `poll_record_batch()`** — returns whatever data is available within the timeout, then returns. Call in a loop for continuous streaming. ### Batch Read (One-Shot) diff --git a/website/docs/user-guide/rust/error-handling.md b/website/docs/user-guide/rust/error-handling.md index edb0c734..8198f65f 100644 --- a/website/docs/user-guide/rust/error-handling.md +++ b/website/docs/user-guide/rust/error-handling.md @@ -96,15 +96,25 @@ match result { The table does not exist or has been dropped. ```rust -use fluss::error::FlussError; +use fluss::error::{Error, FlussError}; -let result = conn.get_table(&table_path).await; +// Admin operations return FlussError::TableNotExist (code 7) +let result = admin.drop_table(&table_path, false).await; match result { Err(ref e) if e.api_error() == Some(FlussError::TableNotExist) => { eprintln!("Table not found: {}", e); } _ => {} } + +// conn.get_table() wraps the error differently — match on FlussAPIError directly +let result = conn.get_table(&table_path).await; +match result { + Err(Error::FlussAPIError { ref api_error }) => { + eprintln!("Server error (code {}): {}", api_error.code, api_error.message); + } + _ => {} +} ``` ### Partition Not Found From abe93279efb8ed6747a0d225864c7bc30c168059 Mon Sep 17 00:00:00 2001 From: Keith Lee Date: Fri, 13 Feb 2026 13:18:56 +0000 Subject: [PATCH 4/4] Update following unsubscribe API changes. Remove LATEST_OFFSET which isn't handle by server. Formatting. --- website/docs/developer-guide/contributing.md | 10 +- website/docs/developer-guide/release.md | 2 +- website/docs/index.md | 16 +- website/docs/user-guide/cpp/api-reference.md | 630 +++++++++--------- website/docs/user-guide/cpp/data-types.md | 71 +- website/docs/user-guide/cpp/error-handling.md | 44 +- .../cpp/example/admin-operations.md | 34 +- .../user-guide/cpp/example/configuration.md | 14 - website/docs/user-guide/cpp/example/index.md | 29 +- .../docs/user-guide/cpp/example/log-tables.md | 40 +- .../cpp/example/partitioned-tables.md | 61 +- .../cpp/example/primary-key-tables.md | 46 +- website/docs/user-guide/cpp/installation.md | 4 +- .../docs/user-guide/python/api-reference.md | 330 ++++----- website/docs/user-guide/python/data-types.md | 24 +- .../docs/user-guide/python/error-handling.md | 8 +- .../python/example/admin-operations.md | 20 +- .../python/example/configuration.md | 14 +- .../user-guide/python/example/log-tables.md | 24 +- .../python/example/partitioned-tables.md | 8 + website/docs/user-guide/rust/api-reference.md | 512 +++++++------- website/docs/user-guide/rust/data-types.md | 36 +- .../docs/user-guide/rust/error-handling.md | 24 +- .../user-guide/rust/example/configuration.md | 14 +- .../user-guide/rust/example/log-tables.md | 22 +- .../rust/example/partitioned-tables.md | 2 +- website/docs/user-guide/rust/installation.md | 10 +- 27 files changed, 1048 insertions(+), 1001 deletions(-) diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md index b0111c35..eced106a 100644 --- a/website/docs/developer-guide/contributing.md +++ b/website/docs/developer-guide/contributing.md @@ -36,7 +36,7 @@ We recommend [RustRover](https://www.jetbrains.com/rust/) IDE. ### Copyright Profile -Fluss is an Apache project — every file needs an Apache licence header. To automate this in RustRover: +Fluss is an Apache project, every file needs an Apache licence header. To automate this in RustRover: 1. Go to `Settings` > `Editor` > `Copyright` > `Copyright Profiles`. 2. Add a new profile named `Apache` with this text: @@ -65,10 +65,10 @@ Fluss is an Apache project — every file needs an Apache licence header. To aut ## Project Structure ``` -crates/fluss — Fluss Rust client crate -crates/examples — Rust client examples -bindings/cpp — C++ bindings -bindings/python — Python bindings (PyO3) +crates/fluss (Fluss Rust client crate) +crates/examples (Rust client examples) +bindings/cpp (C++ bindings) +bindings/python (Python bindings - PyO3) ``` ## Building and Testing diff --git a/website/docs/developer-guide/release.md b/website/docs/developer-guide/release.md index 70448698..0b6f3506 100644 --- a/website/docs/developer-guide/release.md +++ b/website/docs/developer-guide/release.md @@ -155,7 +155,7 @@ svn mv -m "Release fluss-rust ${RELEASE_VERSION}" \ ### Verify Published Packages - **Rust:** [crates.io/crates/fluss-rs](https://crates.io/crates/fluss-rs) -- **Python:** [PyPI — pyfluss](https://pypi.org/project/pyfluss/) +- **Python:** [PyPI pyfluss](https://pypi.org/project/pyfluss/) - **C++:** Distributed via the source archive ### Create GitHub Release diff --git a/website/docs/index.md b/website/docs/index.md index 3f8dd5aa..7117bcfb 100644 --- a/website/docs/index.md +++ b/website/docs/index.md @@ -17,14 +17,14 @@ This documentation covers the **Fluss client libraries** for Rust, Python, and C ## Client Overview -| | Rust | Python | C++ | -|---|---|---|---| -| **Package** | [fluss-rs](https://crates.io/crates/fluss-rs) on crates.io | Build from source (PyO3) | Build from source (CMake) | -| **Async runtime** | Tokio | asyncio | Synchronous (Tokio runtime managed internally) | -| **Data format** | Arrow RecordBatch / GenericRow | PyArrow / Pandas / dict | Arrow RecordBatch / GenericRow | -| **Log tables** | Read + Write | Read + Write | Read + Write | -| **Primary key tables** | Upsert + Delete + Lookup | Upsert + Delete + Lookup | Upsert + Delete + Lookup | -| **Partitioned tables** | Full support | Write support | Full support | +| | Rust | Python | C++ | +|------------------------|------------------------------------------------------------|--------------------------|------------------------------------------------| +| **Package** | [fluss-rs](https://crates.io/crates/fluss-rs) on crates.io | Build from source (PyO3) | Build from source (CMake) | +| **Async runtime** | Tokio | asyncio | Synchronous (Tokio runtime managed internally) | +| **Data format** | Arrow RecordBatch / GenericRow | PyArrow / Pandas / dict | Arrow RecordBatch / GenericRow | +| **Log tables** | Read + Write | Read + Write | Read + Write | +| **Primary key tables** | Upsert + Delete + Lookup | Upsert + Delete + Lookup | Upsert + Delete + Lookup | +| **Partitioned tables** | Full support | Write support | Full support | ## How This Guide Is Organised diff --git a/website/docs/user-guide/cpp/api-reference.md b/website/docs/user-guide/cpp/api-reference.md index 1fb50e15..07a8b071 100644 --- a/website/docs/user-guide/cpp/api-reference.md +++ b/website/docs/user-guide/cpp/api-reference.md @@ -7,480 +7,488 @@ Complete API reference for the Fluss C++ client. ## `Result` -| Field / Method | Type | Description | -|---|---|---| -| `error_code` | `int32_t` | 0 for success, non-zero for errors | -| `error_message` | `std::string` | Human-readable error description | -| `Ok()` | `bool` | Returns `true` if operation succeeded (`error_code == 0`) | +| Field / Method | Type | Description | +|-----------------|---------------|----------------------------------------------------------------| +| `error_code` | `int32_t` | 0 for success, non-zero for errors | +| `error_message` | `std::string` | Human-readable error description | +| `Ok()` | `bool` | Returns `true` if operation succeeded (`error_code == 0`) | ## `Configuration` -| Field | Type | Default | Description | -|---|---|---|---| -| `bootstrap_servers` | `std::string` | `"127.0.0.1:9123"` | Coordinator server address | -| `writer_request_max_size` | `int32_t` | `10485760` (10 MB) | Maximum request size in bytes | -| `writer_acks` | `std::string` | `"all"` | Acknowledgment setting (`"all"`, `"0"`, `"1"`, or `"-1"`) | -| `writer_retries` | `int32_t` | `INT32_MAX` | Number of retries on failure | -| `writer_batch_size` | `int32_t` | `2097152` (2 MB) | Batch size for writes in bytes | -| `scanner_remote_log_prefetch_num` | `size_t` | `4` | Number of remote log segments to prefetch | -| `remote_file_download_thread_num` | `size_t` | `3` | Number of threads for remote log downloads | +| Field | Type | Default | Description | +|-----------------------------------|---------------|----------------------|-----------------------------------------------------------------| +| `bootstrap_servers` | `std::string` | `"127.0.0.1:9123"` | Coordinator server address | +| `writer_request_max_size` | `int32_t` | `10485760` (10 MB) | Maximum request size in bytes | +| `writer_acks` | `std::string` | `"all"` | Acknowledgment setting (`"all"`, `"0"`, `"1"`, or `"-1"`) | +| `writer_retries` | `int32_t` | `INT32_MAX` | Number of retries on failure | +| `writer_batch_size` | `int32_t` | `2097152` (2 MB) | Batch size for writes in bytes | +| `scanner_remote_log_prefetch_num` | `size_t` | `4` | Number of remote log segments to prefetch | +| `remote_file_download_thread_num` | `size_t` | `3` | Number of threads for remote log downloads | ## `Connection` -| Method | Description | -|---|---| -| `static Create(const Configuration& config, Connection& out) -> Result` | Create a connection to a Fluss cluster | -| `GetAdmin(Admin& out) -> Result` | Get the admin interface | -| `GetTable(const TablePath& table_path, Table& out) -> Result` | Get a table for read/write operations | -| `Available() -> bool` | Check if the connection is valid and initialized | +| Method | Description | +|-------------------------------------------------------------------------|---------------------------------------------------| +| `static Create(const Configuration& config, Connection& out) -> Result` | Create a connection to a Fluss cluster | +| `GetAdmin(Admin& out) -> Result` | Get the admin interface | +| `GetTable(const TablePath& table_path, Table& out) -> Result` | Get a table for read/write operations | +| `Available() -> bool` | Check if the connection is valid and initialized | ## `Admin` ### Database Operations -| Method | Description | -|---|---| -| `CreateDatabase(const std::string& database_name, const DatabaseDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a database | -| `DropDatabase(const std::string& name, bool ignore_if_not_exists, bool cascade) -> Result` | Drop a database | -| `ListDatabases(std::vector& out) -> Result` | List all databases | -| `DatabaseExists(const std::string& name, bool& out) -> Result` | Check if a database exists | -| `GetDatabaseInfo(const std::string& name, DatabaseInfo& out) -> Result` | Get database metadata | +| Method | Description | +|---------------------------------------------------------------------------------------------------------------------------|--------------------------| +| `CreateDatabase(const std::string& database_name, const DatabaseDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a database | +| `DropDatabase(const std::string& name, bool ignore_if_not_exists, bool cascade) -> Result` | Drop a database | +| `ListDatabases(std::vector& out) -> Result` | List all databases | +| `DatabaseExists(const std::string& name, bool& out) -> Result` | Check if a database exists | +| `GetDatabaseInfo(const std::string& name, DatabaseInfo& out) -> Result` | Get database metadata | ### Table Operations -| Method | Description | -|---|---| -| `CreateTable(const TablePath& path, const TableDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a table | -| `DropTable(const TablePath& path, bool ignore_if_not_exists) -> Result` | Drop a table | -| `GetTableInfo(const TablePath& path, TableInfo& out) -> Result` | Get table metadata | -| `ListTables(const std::string& database_name, std::vector& out) -> Result` | List tables in a database | -| `TableExists(const TablePath& path, bool& out) -> Result` | Check if a table exists | +| Method | Description | +|------------------------------------------------------------------------------------------------------------|-----------------------------| +| `CreateTable(const TablePath& path, const TableDescriptor& descriptor, bool ignore_if_exists) -> Result` | Create a table | +| `DropTable(const TablePath& path, bool ignore_if_not_exists) -> Result` | Drop a table | +| `GetTableInfo(const TablePath& path, TableInfo& out) -> Result` | Get table metadata | +| `ListTables(const std::string& database_name, std::vector& out) -> Result` | List tables in a database | +| `TableExists(const TablePath& path, bool& out) -> Result` | Check if a table exists | ### Partition Operations -| Method | Description | -|---|---| -| `CreatePartition(const TablePath& path, const std::unordered_map& partition_spec, bool ignore_if_exists) -> Result` | Create a partition | -| `DropPartition(const TablePath& path, const std::unordered_map& partition_spec, bool ignore_if_not_exists) -> Result` | Drop a partition | -| `ListPartitionInfos(const TablePath& path, std::vector& out) -> Result` | List partition metadata | +| Method | Description | +|-------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------| +| `CreatePartition(const TablePath& path, const std::unordered_map& partition_spec, bool ignore_if_exists) -> Result` | Create a partition | +| `DropPartition(const TablePath& path, const std::unordered_map& partition_spec, bool ignore_if_not_exists) -> Result` | Drop a partition | +| `ListPartitionInfos(const TablePath& path, std::vector& out) -> Result` | List partition metadata | ### Offset Operations -| Method | Description | -|---|---| -| `ListOffsets(const TablePath& path, const std::vector& bucket_ids, const OffsetQuery& query, std::unordered_map& out) -> Result` | Get offsets for buckets | -| `ListPartitionOffsets(const TablePath& path, const std::string& partition_name, const std::vector& bucket_ids, const OffsetQuery& query, std::unordered_map& out) -> Result` | Get offsets for a partition's buckets | +| Method | Description | +|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------| +| `ListOffsets(const TablePath& path, const std::vector& bucket_ids, const OffsetQuery& query, std::unordered_map& out) -> Result` | Get offsets for buckets | +| `ListPartitionOffsets(const TablePath& path, const std::string& partition_name, const std::vector& bucket_ids, const OffsetQuery& query, std::unordered_map& out) -> Result` | Get offsets for a partition's buckets | ### Lake Operations -| Method | Description | -|---|---| +| Method | Description | +|-----------------------------------------------------------------------------|------------------------------| | `GetLatestLakeSnapshot(const TablePath& path, LakeSnapshot& out) -> Result` | Get the latest lake snapshot | ## `Table` -| Method | Description | -|---|---| -| `NewRow() -> GenericRow` | Create a schema-aware row for this table | -| `NewAppend() -> TableAppend` | Create an append builder for log tables | -| `NewUpsert() -> TableUpsert` | Create an upsert builder for PK tables | -| `NewLookup() -> TableLookup` | Create a lookup builder for PK tables | -| `NewScan() -> TableScan` | Create a scan builder | -| `GetTableInfo() -> TableInfo` | Get table metadata | -| `GetTablePath() -> TablePath` | Get the table path | -| `HasPrimaryKey() -> bool` | Check if the table has a primary key | +| Method | Description | +|-------------------------------|------------------------------------------| +| `NewRow() -> GenericRow` | Create a schema-aware row for this table | +| `NewAppend() -> TableAppend` | Create an append builder for log tables | +| `NewUpsert() -> TableUpsert` | Create an upsert builder for PK tables | +| `NewLookup() -> TableLookup` | Create a lookup builder for PK tables | +| `NewScan() -> TableScan` | Create a scan builder | +| `GetTableInfo() -> TableInfo` | Get table metadata | +| `GetTablePath() -> TablePath` | Get the table path | +| `HasPrimaryKey() -> bool` | Check if the table has a primary key | ## `TableAppend` -| Method | Description | -|---|---| -| `CreateWriter(AppendWriter& out) -> Result` | Create an append writer | +| Method | Description | +|----------------------------------------------|-------------------------| +| `CreateWriter(AppendWriter& out) -> Result` | Create an append writer | ## `TableUpsert` -| Method | Description | -|---|---| -| `PartialUpdateByIndex(std::vector column_indices) -> TableUpsert&` | Configure partial update by column indices | -| `PartialUpdateByName(std::vector column_names) -> TableUpsert&` | Configure partial update by column names | -| `CreateWriter(UpsertWriter& out) -> Result` | Create an upsert writer | +| Method | Description | +|------------------------------------------------------------------------------|--------------------------------------------| +| `PartialUpdateByIndex(std::vector column_indices) -> TableUpsert&` | Configure partial update by column indices | +| `PartialUpdateByName(std::vector column_names) -> TableUpsert&` | Configure partial update by column names | +| `CreateWriter(UpsertWriter& out) -> Result` | Create an upsert writer | ## `TableLookup` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------------|-------------------------------------| | `CreateLookuper(Lookuper& out) -> Result` | Create a lookuper for point lookups | ## `TableScan` -| Method | Description | -|---|---| -| `ProjectByIndex(std::vector column_indices) -> TableScan&` | Project columns by index | -| `ProjectByName(std::vector column_names) -> TableScan&` | Project columns by name | -| `CreateLogScanner(LogScanner& out) -> Result` | Create a record-based log scanner | -| `CreateRecordBatchLogScanner(LogScanner& out) -> Result` | Create an Arrow RecordBatch-based log scanner | +| Method | Description | +|----------------------------------------------------------------------|-----------------------------------------------| +| `ProjectByIndex(std::vector column_indices) -> TableScan&` | Project columns by index | +| `ProjectByName(std::vector column_names) -> TableScan&` | Project columns by name | +| `CreateLogScanner(LogScanner& out) -> Result` | Create a record-based log scanner | +| `CreateRecordBatchLogScanner(LogScanner& out) -> Result` | Create an Arrow RecordBatch-based log scanner | ## `AppendWriter` -| Method | Description | -|---|---| -| `Append(const GenericRow& row) -> Result` | Append a row (fire-and-forget) | +| Method | Description | +|-------------------------------------------------------------|----------------------------------------| +| `Append(const GenericRow& row) -> Result` | Append a row (fire-and-forget) | | `Append(const GenericRow& row, WriteResult& out) -> Result` | Append a row with write acknowledgment | -| `Flush() -> Result` | Flush all pending writes | +| `Flush() -> Result` | Flush all pending writes | ## `UpsertWriter` -| Method | Description | -|---|---| -| `Upsert(const GenericRow& row) -> Result` | Upsert a row (fire-and-forget) | -| `Upsert(const GenericRow& row, WriteResult& out) -> Result` | Upsert a row with write acknowledgment | -| `Delete(const GenericRow& row) -> Result` | Delete a row by primary key (fire-and-forget) | -| `Delete(const GenericRow& row, WriteResult& out) -> Result` | Delete a row with write acknowledgment | -| `Flush() -> Result` | Flush all pending operations | +| Method | Description | +|-------------------------------------------------------------|-----------------------------------------------| +| `Upsert(const GenericRow& row) -> Result` | Upsert a row (fire-and-forget) | +| `Upsert(const GenericRow& row, WriteResult& out) -> Result` | Upsert a row with write acknowledgment | +| `Delete(const GenericRow& row) -> Result` | Delete a row by primary key (fire-and-forget) | +| `Delete(const GenericRow& row, WriteResult& out) -> Result` | Delete a row with write acknowledgment | +| `Flush() -> Result` | Flush all pending operations | ## `WriteResult` -| Method | Description | -|---|---| +| Method | Description | +|--------------------|---------------------------------------------| | `Wait() -> Result` | Wait for server acknowledgment of the write | ## `Lookuper` -| Method | Description | -|---|---| +| Method | Description | +|----------------------------------------------------------------------------|-----------------------------| | `Lookup(const GenericRow& pk_row, bool& found, GenericRow& out) -> Result` | Lookup a row by primary key | ## `LogScanner` -| Method | Description | -|---|---| -| `Subscribe(int32_t bucket_id, int64_t offset) -> Result` | Subscribe to a single bucket at an offset | -| `Subscribe(const std::vector& bucket_offsets) -> Result` | Subscribe to multiple buckets | -| `SubscribePartitionBuckets(int64_t partition_id, int32_t bucket_id, int64_t start_offset) -> Result` | Subscribe to a single partition bucket | -| `SubscribePartitionBuckets(const std::vector& subscriptions) -> Result` | Subscribe to multiple partition buckets | -| `UnsubscribePartition(int64_t partition_id, int32_t bucket_id) -> Result` | Unsubscribe from a partition bucket | -| `Poll(int64_t timeout_ms, ScanRecords& out) -> Result` | Poll individual records | -| `PollRecordBatch(int64_t timeout_ms, ArrowRecordBatches& out) -> Result` | Poll Arrow RecordBatches | +| Method | Description | +|------------------------------------------------------------------------------------------------------|-------------------------------------------| +| `Subscribe(int32_t bucket_id, int64_t offset) -> Result` | Subscribe to a single bucket at an offset | +| `Subscribe(const std::vector& bucket_offsets) -> Result` | Subscribe to multiple buckets | +| `SubscribePartitionBuckets(int64_t partition_id, int32_t bucket_id, int64_t start_offset) -> Result` | Subscribe to a single partition bucket | +| `SubscribePartitionBuckets(const std::vector& subscriptions) -> Result` | Subscribe to multiple partition buckets | +| `Unsubscribe(int32_t bucket_id) -> Result` | Unsubscribe from a non-partitioned bucket | +| `UnsubscribePartition(int64_t partition_id, int32_t bucket_id) -> Result` | Unsubscribe from a partition bucket | +| `Poll(int64_t timeout_ms, ScanRecords& out) -> Result` | Poll individual records | +| `PollRecordBatch(int64_t timeout_ms, ArrowRecordBatches& out) -> Result` | Poll Arrow RecordBatches | ## `GenericRow` ### Index-Based Getters -| Method | Description | -|---|---| -| `GetBool(size_t idx) -> bool` | Get boolean value at index | -| `GetInt32(size_t idx) -> int32_t` | Get 32-bit integer at index | -| `GetInt64(size_t idx) -> int64_t` | Get 64-bit integer at index | -| `GetFloat32(size_t idx) -> float` | Get 32-bit float at index | -| `GetFloat64(size_t idx) -> double` | Get 64-bit float at index | -| `GetString(size_t idx) -> std::string` | Get string at index | -| `GetBytes(size_t idx) -> std::vector` | Get binary data at index | -| `GetDate(size_t idx) -> Date` | Get date at index | -| `GetTime(size_t idx) -> Time` | Get time at index | -| `GetTimestamp(size_t idx) -> Timestamp` | Get timestamp at index | -| `DecimalToString(size_t idx) -> std::string` | Get decimal as string at index | +| Method | Description | +|------------------------------------------------|--------------------------------| +| `GetBool(size_t idx) -> bool` | Get boolean value at index | +| `GetInt32(size_t idx) -> int32_t` | Get 32-bit integer at index | +| `GetInt64(size_t idx) -> int64_t` | Get 64-bit integer at index | +| `GetFloat32(size_t idx) -> float` | Get 32-bit float at index | +| `GetFloat64(size_t idx) -> double` | Get 64-bit float at index | +| `GetString(size_t idx) -> std::string` | Get string at index | +| `GetBytes(size_t idx) -> std::vector` | Get binary data at index | +| `GetDate(size_t idx) -> Date` | Get date at index | +| `GetTime(size_t idx) -> Time` | Get time at index | +| `GetTimestamp(size_t idx) -> Timestamp` | Get timestamp at index | +| `DecimalToString(size_t idx) -> std::string` | Get decimal as string at index | ### Index-Based Setters -| Method | Description | -|---|---| -| `SetNull(size_t idx)` | Set field to null | -| `SetBool(size_t idx, bool value)` | Set boolean value | -| `SetInt32(size_t idx, int32_t value)` | Set 32-bit integer | -| `SetInt64(size_t idx, int64_t value)` | Set 64-bit integer | -| `SetFloat32(size_t idx, float value)` | Set 32-bit float | -| `SetFloat64(size_t idx, double value)` | Set 64-bit float | -| `SetString(size_t idx, const std::string& value)` | Set string value | -| `SetBytes(size_t idx, const std::vector& value)` | Set binary data | -| `SetDate(size_t idx, const Date& value)` | Set date value | -| `SetTime(size_t idx, const Time& value)` | Set time value | -| `SetTimestampNtz(size_t idx, const Timestamp& value)` | Set timestamp without timezone | -| `SetTimestampLtz(size_t idx, const Timestamp& value)` | Set timestamp with timezone | -| `SetDecimal(size_t idx, const std::string& value)` | Set decimal from string | +| Method | Description | +|-----------------------------------------------------------|--------------------------------| +| `SetNull(size_t idx)` | Set field to null | +| `SetBool(size_t idx, bool value)` | Set boolean value | +| `SetInt32(size_t idx, int32_t value)` | Set 32-bit integer | +| `SetInt64(size_t idx, int64_t value)` | Set 64-bit integer | +| `SetFloat32(size_t idx, float value)` | Set 32-bit float | +| `SetFloat64(size_t idx, double value)` | Set 64-bit float | +| `SetString(size_t idx, const std::string& value)` | Set string value | +| `SetBytes(size_t idx, const std::vector& value)` | Set binary data | +| `SetDate(size_t idx, const Date& value)` | Set date value | +| `SetTime(size_t idx, const Time& value)` | Set time value | +| `SetTimestampNtz(size_t idx, const Timestamp& value)` | Set timestamp without timezone | +| `SetTimestampLtz(size_t idx, const Timestamp& value)` | Set timestamp with timezone | +| `SetDecimal(size_t idx, const std::string& value)` | Set decimal from string | ### Name-Based Setters When using `table.NewRow()`, the `Set()` method auto-routes to the correct type based on the schema: -| Method | Description | -|---|---| -| `Set(const std::string& name, bool value)` | Set boolean by column name | -| `Set(const std::string& name, int32_t value)` | Set integer by column name | -| `Set(const std::string& name, int64_t value)` | Set big integer by column name | -| `Set(const std::string& name, float value)` | Set float by column name | -| `Set(const std::string& name, double value)` | Set double by column name | +| Method | Description | +|----------------------------------------------------------|-----------------------------------| +| `Set(const std::string& name, bool value)` | Set boolean by column name | +| `Set(const std::string& name, int32_t value)` | Set integer by column name | +| `Set(const std::string& name, int64_t value)` | Set big integer by column name | +| `Set(const std::string& name, float value)` | Set float by column name | +| `Set(const std::string& name, double value)` | Set double by column name | | `Set(const std::string& name, const std::string& value)` | Set string/decimal by column name | -| `Set(const std::string& name, const Date& value)` | Set date by column name | -| `Set(const std::string& name, const Time& value)` | Set time by column name | -| `Set(const std::string& name, const Timestamp& value)` | Set timestamp by column name | +| `Set(const std::string& name, const Date& value)` | Set date by column name | +| `Set(const std::string& name, const Time& value)` | Set time by column name | +| `Set(const std::string& name, const Timestamp& value)` | Set timestamp by column name | ### Row Inspection -| Method | Description | -|---|---| -| `FieldCount() -> size_t` | Get the number of fields | -| `GetType(size_t idx) -> DatumType` | Get the datum type at index | -| `IsNull(size_t idx) -> bool` | Check if field is null | -| `IsDecimal(size_t idx) -> bool` | Check if field is a decimal type | +| Method | Description | +|------------------------------------|----------------------------------| +| `FieldCount() -> size_t` | Get the number of fields | +| `GetType(size_t idx) -> DatumType` | Get the datum type at index | +| `IsNull(size_t idx) -> bool` | Check if field is null | +| `IsDecimal(size_t idx) -> bool` | Check if field is a decimal type | ## `ScanRecord` -| Field | Type | Description | -|---|---|---| -| `bucket_id` | `int32_t` | Bucket this record belongs to | -| `offset` | `int64_t` | Record offset in the log | -| `timestamp` | `int64_t` | Record timestamp | -| `row` | `GenericRow` | Row data | +| Field | Type | Description | +|-------------|--------------|-------------------------------| +| `bucket_id` | `int32_t` | Bucket this record belongs to | +| `offset` | `int64_t` | Record offset in the log | +| `timestamp` | `int64_t` | Record timestamp | +| `row` | `GenericRow` | Row data | ## `ScanRecords` -| Method | Description | -|---|---| -| `Size() -> size_t` | Number of records | -| `Empty() -> bool` | Check if empty | -| `operator[](size_t idx) -> const ScanRecord&` | Access record by index | -| `begin() / end()` | Iterator support for range-based for loops | +| Method | Description | +|-----------------------------------------------|--------------------------------------------| +| `Size() -> size_t` | Number of records | +| `Empty() -> bool` | Check if empty | +| `operator[](size_t idx) -> const ScanRecord&` | Access record by index | +| `begin() / end()` | Iterator support for range-based for loops | ## `ArrowRecordBatch` -| Method | Description | -|---|---| +| Method | Description | +|----------------------------------------------------------------|--------------------------------------| | `GetArrowRecordBatch() -> std::shared_ptr` | Get the underlying Arrow RecordBatch | -| `NumRows() -> int64_t` | Number of rows in the batch | -| `GetTableId() -> int64_t` | Table ID | -| `GetPartitionId() -> int64_t` | Partition ID | -| `GetBucketId() -> int32_t` | Bucket ID | -| `GetBaseOffset() -> int64_t` | First record offset | -| `GetLastOffset() -> int64_t` | Last record offset | +| `NumRows() -> int64_t` | Number of rows in the batch | +| `GetTableId() -> int64_t` | Table ID | +| `GetPartitionId() -> int64_t` | Partition ID | +| `GetBucketId() -> int32_t` | Bucket ID | +| `GetBaseOffset() -> int64_t` | First record offset | +| `GetLastOffset() -> int64_t` | Last record offset | ## `ArrowRecordBatches` -| Method | Description | -|---|---| -| `Size() -> size_t` | Number of batches | -| `Empty() -> bool` | Check if empty | -| `operator[](size_t idx)` | Access batch by index | -| `begin() / end()` | Iterator support for range-based for loops | +| Method | Description | +|--------------------------|--------------------------------------------| +| `Size() -> size_t` | Number of batches | +| `Empty() -> bool` | Check if empty | +| `operator[](size_t idx)` | Access batch by index | +| `begin() / end()` | Iterator support for range-based for loops | ## `Schema` -| Method | Description | -|---|---| +| Method | Description | +|-----------------------------------|-----------------------------| | `NewBuilder() -> Schema::Builder` | Create a new schema builder | ## `Schema::Builder` -| Method | Description | -|---|---| -| `AddColumn(const std::string& name, const DataType& type) -> Builder&` | Add a column | -| `SetPrimaryKeys(const std::vector& keys) -> Builder&` | Set primary key columns | -| `Build() -> Schema` | Build the schema | +| Method | Description | +|------------------------------------------------------------------------|-------------------------| +| `AddColumn(const std::string& name, const DataType& type) -> Builder&` | Add a column | +| `SetPrimaryKeys(const std::vector& keys) -> Builder&` | Set primary key columns | +| `Build() -> Schema` | Build the schema | ## `TableDescriptor` -| Method | Description | -|---|---| +| Method | Description | +|--------------------------------------------|---------------------------------------| | `NewBuilder() -> TableDescriptor::Builder` | Create a new table descriptor builder | ## `TableDescriptor::Builder` -| Method | Description | -|---|---| -| `SetSchema(const Schema& schema) -> Builder&` | Set the table schema | -| `SetPartitionKeys(const std::vector& keys) -> Builder&` | Set partition key columns | -| `SetBucketCount(int32_t count) -> Builder&` | Set the number of buckets | -| `SetBucketKeys(const std::vector& keys) -> Builder&` | Set bucket key columns | -| `SetProperty(const std::string& key, const std::string& value) -> Builder&` | Set a table property | -| `SetComment(const std::string& comment) -> Builder&` | Set a table comment | -| `Build() -> TableDescriptor` | Build the table descriptor | +| Method | Description | +|-----------------------------------------------------------------------------|----------------------------| +| `SetSchema(const Schema& schema) -> Builder&` | Set the table schema | +| `SetPartitionKeys(const std::vector& keys) -> Builder&` | Set partition key columns | +| `SetBucketCount(int32_t count) -> Builder&` | Set the number of buckets | +| `SetBucketKeys(const std::vector& keys) -> Builder&` | Set bucket key columns | +| `SetProperty(const std::string& key, const std::string& value) -> Builder&` | Set a table property | +| `SetComment(const std::string& comment) -> Builder&` | Set a table comment | +| `Build() -> TableDescriptor` | Build the table descriptor | ## `DataType` ### Factory Methods -| Method | Description | -|---|---| -| `DataType::Boolean()` | Boolean type | -| `DataType::TinyInt()` | 8-bit signed integer | -| `DataType::SmallInt()` | 16-bit signed integer | -| `DataType::Int()` | 32-bit signed integer | -| `DataType::BigInt()` | 64-bit signed integer | -| `DataType::Float()` | 32-bit floating point | -| `DataType::Double()` | 64-bit floating point | -| `DataType::String()` | UTF-8 string | -| `DataType::Bytes()` | Binary data | -| `DataType::Date()` | Date (days since epoch) | -| `DataType::Time()` | Time (milliseconds since midnight) | -| `DataType::Timestamp(int precision)` | Timestamp without timezone | -| `DataType::TimestampLtz(int precision)` | Timestamp with timezone | -| `DataType::Decimal(int precision, int scale)` | Decimal with precision and scale | +| Method | Description | +|-----------------------------------------------|------------------------------------| +| `DataType::Boolean()` | Boolean type | +| `DataType::TinyInt()` | 8-bit signed integer | +| `DataType::SmallInt()` | 16-bit signed integer | +| `DataType::Int()` | 32-bit signed integer | +| `DataType::BigInt()` | 64-bit signed integer | +| `DataType::Float()` | 32-bit floating point | +| `DataType::Double()` | 64-bit floating point | +| `DataType::String()` | UTF-8 string | +| `DataType::Bytes()` | Binary data | +| `DataType::Date()` | Date (days since epoch) | +| `DataType::Time()` | Time (milliseconds since midnight) | +| `DataType::Timestamp(int precision)` | Timestamp without timezone | +| `DataType::TimestampLtz(int precision)` | Timestamp with timezone | +| `DataType::Decimal(int precision, int scale)` | Decimal with precision and scale | ### Accessors -| Method | Description | -|---|---| -| `id() -> TypeId` | Get the type ID | +| Method | Description | +|----------------------|---------------------------------------------| +| `id() -> TypeId` | Get the type ID | | `precision() -> int` | Get precision (for Decimal/Timestamp types) | -| `scale() -> int` | Get scale (for Decimal type) | +| `scale() -> int` | Get scale (for Decimal type) | ## `TablePath` -| Method / Field | Description | -|---|---| -| `TablePath(const std::string& database, const std::string& table)` | Create a table path | -| `database_name -> std::string` | Database name | -| `table_name -> std::string` | Table name | -| `ToString() -> std::string` | String representation | +| Method / Field | Description | +|--------------------------------------------------------------------|-----------------------| +| `TablePath(const std::string& database, const std::string& table)` | Create a table path | +| `database_name -> std::string` | Database name | +| `table_name -> std::string` | Table name | +| `ToString() -> std::string` | String representation | ## `TableInfo` -| Field | Type | Description | -|---|---|---| -| `table_id` | `int64_t` | Table ID | -| `schema_id` | `int32_t` | Schema ID | -| `table_path` | `TablePath` | Table path | -| `created_time` | `int64_t` | Creation timestamp | -| `modified_time` | `int64_t` | Last modification timestamp | -| `primary_keys` | `std::vector` | Primary key columns | -| `bucket_keys` | `std::vector` | Bucket key columns | -| `partition_keys` | `std::vector` | Partition key columns | -| `num_buckets` | `int32_t` | Number of buckets | -| `has_primary_key` | `bool` | Whether the table has a primary key | -| `is_partitioned` | `bool` | Whether the table is partitioned | -| `properties` | `std::unordered_map` | Table properties | -| `comment` | `std::string` | Table comment | -| `schema` | `Schema` | Table schema | +| Field | Type | Description | +|-------------------|------------------------------------------------|-------------------------------------| +| `table_id` | `int64_t` | Table ID | +| `schema_id` | `int32_t` | Schema ID | +| `table_path` | `TablePath` | Table path | +| `created_time` | `int64_t` | Creation timestamp | +| `modified_time` | `int64_t` | Last modification timestamp | +| `primary_keys` | `std::vector` | Primary key columns | +| `bucket_keys` | `std::vector` | Bucket key columns | +| `partition_keys` | `std::vector` | Partition key columns | +| `num_buckets` | `int32_t` | Number of buckets | +| `has_primary_key` | `bool` | Whether the table has a primary key | +| `is_partitioned` | `bool` | Whether the table is partitioned | +| `properties` | `std::unordered_map` | Table properties | +| `comment` | `std::string` | Table comment | +| `schema` | `Schema` | Table schema | ## Temporal Types ### `Date` -| Method | Description | -|---|---| -| `Date::FromDays(int32_t days)` | Create from days since epoch | +| Method | Description | +|-----------------------------------------------|------------------------------| +| `Date::FromDays(int32_t days)` | Create from days since epoch | | `Date::FromYMD(int year, int month, int day)` | Create from year, month, day | -| `Year() -> int` | Get year | -| `Month() -> int` | Get month | -| `Day() -> int` | Get day | +| `Year() -> int` | Get year | +| `Month() -> int` | Get month | +| `Day() -> int` | Get day | ### `Time` -| Method | Description | -|---|---| -| `Time::FromMillis(int32_t millis)` | Create from milliseconds since midnight | -| `Time::FromHMS(int hour, int minute, int second)` | Create from hour, minute, second | -| `Hour() -> int` | Get hour | -| `Minute() -> int` | Get minute | -| `Second() -> int` | Get second | -| `Millis() -> int64_t` | Get milliseconds | +| Method | Description | +|---------------------------------------------------|----------------------------------------------| +| `Time::FromMillis(int32_t millis)` | Create from milliseconds since midnight | +| `Time::FromHMS(int hour, int minute, int second)` | Create from hour, minute, second | +| `Hour() -> int` | Get hour | +| `Minute() -> int` | Get minute | +| `Second() -> int` | Get second | +| `Millis() -> int64_t` | Get sub-second millisecond component (0-999) | ### `Timestamp` -| Method | Description | -|---|---| -| `Timestamp::FromMillis(int64_t millis)` | Create from milliseconds since epoch | -| `Timestamp::FromMillisNanos(int64_t millis, int32_t nanos)` | Create from milliseconds and nanoseconds | -| `Timestamp::FromTimePoint(std::chrono::system_clock::time_point tp)` | Create from a time point | +| Method | Description | +|----------------------------------------------------------------------|------------------------------------------| +| `Timestamp::FromMillis(int64_t millis)` | Create from milliseconds since epoch | +| `Timestamp::FromMillisNanos(int64_t millis, int32_t nanos)` | Create from milliseconds and nanoseconds | +| `Timestamp::FromTimePoint(std::chrono::system_clock::time_point tp)` | Create from a time point | ## `PartitionInfo` -| Field | Type | Description | -|---|---|---| -| `partition_id` | `int64_t` | Partition ID | +| Field | Type | Description | +|------------------|---------------|----------------| +| `partition_id` | `int64_t` | Partition ID | | `partition_name` | `std::string` | Partition name | ## `DatabaseDescriptor` -| Field | Type | Description | -|---|---|---| -| `comment` | `std::string` | Database comment | +| Field | Type | Description | +|--------------|------------------------------------------------|-------------------| +| `comment` | `std::string` | Database comment | | `properties` | `std::unordered_map` | Custom properties | ## `DatabaseInfo` -| Field | Type | Description | -|---|---|---| -| `database_name` | `std::string` | Database name | -| `comment` | `std::string` | Database comment | -| `properties` | `std::unordered_map` | Custom properties | -| `created_time` | `int64_t` | Creation timestamp | -| `modified_time` | `int64_t` | Last modification timestamp | +| Field | Type | Description | +|-----------------|------------------------------------------------|-----------------------------| +| `database_name` | `std::string` | Database name | +| `comment` | `std::string` | Database comment | +| `properties` | `std::unordered_map` | Custom properties | +| `created_time` | `int64_t` | Creation timestamp | +| `modified_time` | `int64_t` | Last modification timestamp | ## `LakeSnapshot` -| Field | Type | Description | -|---|---|---| -| `snapshot_id` | `int64_t` | Snapshot ID | +| Field | Type | Description | +|------------------|-----------------------------|--------------------| +| `snapshot_id` | `int64_t` | Snapshot ID | | `bucket_offsets` | `std::vector` | All bucket offsets | ## `BucketOffset` -| Field | Type | Description | -|---|---|---| -| `table_id` | `int64_t` | Table ID | +| Field | Type | Description | +|----------------|-----------|--------------| +| `table_id` | `int64_t` | Table ID | | `partition_id` | `int64_t` | Partition ID | -| `bucket_id` | `int32_t` | Bucket ID | -| `offset` | `int64_t` | Offset value | +| `bucket_id` | `int32_t` | Bucket ID | +| `offset` | `int64_t` | Offset value | ## `OffsetQuery` -| Method | Description | -|---|---| -| `OffsetQuery::Earliest()` | Query for the earliest available offset | -| `OffsetQuery::Latest()` | Query for the latest offset | -| `OffsetQuery::FromTimestamp(int64_t timestamp_ms)` | Query offset at a specific timestamp | +| Method | Description | +|----------------------------------------------------|-----------------------------------------| +| `OffsetQuery::Earliest()` | Query for the earliest available offset | +| `OffsetQuery::Latest()` | Query for the latest offset | +| `OffsetQuery::FromTimestamp(int64_t timestamp_ms)` | Query offset at a specific timestamp | ## Constants -| Constant | Value | Description | -|---|---|---| -| `fluss::EARLIEST_OFFSET` | `-2` | Start reading from the earliest available offset | -| `fluss::LATEST_OFFSET` | `-1` | Start reading from the latest offset (only new records) | +| Constant | Value | Description | +|--------------------------|--------|---------------------------------------------------------| +| `fluss::EARLIEST_OFFSET` | `-2` | Start reading from the earliest available offset | + +To start reading from the latest offset (only new records), resolve the current offset via `ListOffsets` before subscribing: + +```cpp +std::unordered_map offsets; +admin.ListOffsets(table_path, {0}, fluss::OffsetQuery::Latest(), offsets); +scanner.Subscribe(0, offsets[0]); +``` ## Enums ### `TypeId` -| Value | Description | -|---|---| -| `Boolean` | Boolean type | -| `TinyInt` | 8-bit signed integer | -| `SmallInt` | 16-bit signed integer | -| `Int` | 32-bit signed integer | -| `BigInt` | 64-bit signed integer | -| `Float` | 32-bit floating point | -| `Double` | 64-bit floating point | -| `String` | UTF-8 string | -| `Bytes` | Binary data | -| `Date` | Date | -| `Time` | Time | -| `Timestamp` | Timestamp without timezone | -| `TimestampLtz` | Timestamp with timezone | -| `Decimal` | Decimal | +| Value | Description | +|----------------|----------------------------| +| `Boolean` | Boolean type | +| `TinyInt` | 8-bit signed integer | +| `SmallInt` | 16-bit signed integer | +| `Int` | 32-bit signed integer | +| `BigInt` | 64-bit signed integer | +| `Float` | 32-bit floating point | +| `Double` | 64-bit floating point | +| `String` | UTF-8 string | +| `Bytes` | Binary data | +| `Date` | Date | +| `Time` | Time | +| `Timestamp` | Timestamp without timezone | +| `TimestampLtz` | Timestamp with timezone | +| `Decimal` | Decimal | ### `DatumType` -| Value | C++ Type | Description | -|---|---|---| -| `Null` | -- | Null value | -| `Bool` | `bool` | Boolean | -| `Int32` | `int32_t` | 32-bit integer | -| `Int64` | `int64_t` | 64-bit integer | -| `Float32` | `float` | 32-bit float | -| `Float64` | `double` | 64-bit float | -| `String` | `std::string` | String | -| `Bytes` | `std::vector` | Binary data | -| `DecimalI64` | `int64_t` | Decimal (64-bit internal) | -| `DecimalI128` | `__int128` | Decimal (128-bit internal) | -| `DecimalString` | `std::string` | Decimal (string representation) | -| `Date` | `Date` | Date | -| `Time` | `Time` | Time | -| `TimestampNtz` | `Timestamp` | Timestamp without timezone | -| `TimestampLtz` | `Timestamp` | Timestamp with timezone | +| Value | C++ Type | Description | +|-----------------|------------------------|---------------------------------| +| `Null` | -- | Null value | +| `Bool` | `bool` | Boolean | +| `Int32` | `int32_t` | 32-bit integer | +| `Int64` | `int64_t` | 64-bit integer | +| `Float32` | `float` | 32-bit float | +| `Float64` | `double` | 64-bit float | +| `String` | `std::string` | String | +| `Bytes` | `std::vector` | Binary data | +| `DecimalI64` | `int64_t` | Decimal (64-bit internal) | +| `DecimalI128` | `__int128` | Decimal (128-bit internal) | +| `DecimalString` | `std::string` | Decimal (string representation) | +| `Date` | `Date` | Date | +| `Time` | `Time` | Time | +| `TimestampNtz` | `Timestamp` | Timestamp without timezone | +| `TimestampLtz` | `Timestamp` | Timestamp with timezone | ### `OffsetSpec` -| Value | Description | -|---|---| -| `Earliest` | Earliest available offset | -| `Latest` | Latest offset | +| Value | Description | +|-------------|--------------------------------| +| `Earliest` | Earliest available offset | +| `Latest` | Latest offset | | `Timestamp` | Offset at a specific timestamp | diff --git a/website/docs/user-guide/cpp/data-types.md b/website/docs/user-guide/cpp/data-types.md index 765b2f12..65e6e4a4 100644 --- a/website/docs/user-guide/cpp/data-types.md +++ b/website/docs/user-guide/cpp/data-types.md @@ -5,22 +5,22 @@ sidebar_position: 3 ## Schema DataTypes -| DataType | Description | -|---|---| -| `DataType::Boolean()` | Boolean value | -| `DataType::TinyInt()` | 8-bit signed integer | -| `DataType::SmallInt()` | 16-bit signed integer | -| `DataType::Int()` | 32-bit signed integer | -| `DataType::BigInt()` | 64-bit signed integer | -| `DataType::Float()` | 32-bit floating point | -| `DataType::Double()` | 64-bit floating point | -| `DataType::String()` | UTF-8 string | -| `DataType::Bytes()` | Binary data | -| `DataType::Date()` | Date (days since epoch) | -| `DataType::Time()` | Time (milliseconds since midnight) | -| `DataType::Timestamp()` | Timestamp without timezone | -| `DataType::TimestampLtz()` | Timestamp with timezone | -| `DataType::Decimal(p, s)` | Decimal with precision and scale | +| DataType | Description | +|----------------------------|------------------------------------| +| `DataType::Boolean()` | Boolean value | +| `DataType::TinyInt()` | 8-bit signed integer | +| `DataType::SmallInt()` | 16-bit signed integer | +| `DataType::Int()` | 32-bit signed integer | +| `DataType::BigInt()` | 64-bit signed integer | +| `DataType::Float()` | 32-bit floating point | +| `DataType::Double()` | 64-bit floating point | +| `DataType::String()` | UTF-8 string | +| `DataType::Bytes()` | Binary data | +| `DataType::Date()` | Date (days since epoch) | +| `DataType::Time()` | Time (milliseconds since midnight) | +| `DataType::Timestamp()` | Timestamp without timezone | +| `DataType::TimestampLtz()` | Timestamp with timezone | +| `DataType::Decimal(p, s)` | Decimal with precision and scale | ## GenericRow Setters @@ -64,21 +64,21 @@ fluss::Timestamp ts = result_row.GetTimestamp(7); ## DatumType Enum -| DatumType | C++ Type | Getter | -|---|---|---| -| `Null` | — | `IsNull(idx)` | -| `Bool` | `bool` | `GetBool(idx)` | -| `Int32` | `int32_t` | `GetInt32(idx)` | -| `Int64` | `int64_t` | `GetInt64(idx)` | -| `Float32` | `float` | `GetFloat32(idx)` | -| `Float64` | `double` | `GetFloat64(idx)` | -| `String` | `std::string` | `GetString(idx)` | -| `Bytes` | `std::vector` | `GetBytes(idx)` | -| `Date` | `Date` | `GetDate(idx)` | -| `Time` | `Time` | `GetTime(idx)` | -| `TimestampNtz` | `Timestamp` | `GetTimestamp(idx)` | -| `TimestampLtz` | `Timestamp` | `GetTimestamp(idx)` | -| `DecimalString` | `std::string` | `DecimalToString(idx)` | +| DatumType | C++ Type | Getter | +|-----------------|------------------------|------------------------| +| `Null` | -- | `IsNull(idx)` | +| `Bool` | `bool` | `GetBool(idx)` | +| `Int32` | `int32_t` | `GetInt32(idx)` | +| `Int64` | `int64_t` | `GetInt64(idx)` | +| `Float32` | `float` | `GetFloat32(idx)` | +| `Float64` | `double` | `GetFloat64(idx)` | +| `String` | `std::string` | `GetString(idx)` | +| `Bytes` | `std::vector` | `GetBytes(idx)` | +| `Date` | `Date` | `GetDate(idx)` | +| `Time` | `Time` | `GetTime(idx)` | +| `TimestampNtz` | `Timestamp` | `GetTimestamp(idx)` | +| `TimestampLtz` | `Timestamp` | `GetTimestamp(idx)` | +| `DecimalString` | `std::string` | `DecimalToString(idx)` | ## Type Checking @@ -98,5 +98,12 @@ if (rec.row.IsDecimal(2)) { ```cpp constexpr int64_t fluss::EARLIEST_OFFSET = -2; // Start from earliest -constexpr int64_t fluss::LATEST_OFFSET = -1; // Start from latest +``` + +To start reading from the latest offset, resolve the current offset via `ListOffsets` before subscribing: + +```cpp +std::unordered_map offsets; +admin.ListOffsets(table_path, {0}, fluss::OffsetQuery::Latest(), offsets); +scanner.Subscribe(0, offsets[0]); ``` diff --git a/website/docs/user-guide/cpp/error-handling.md b/website/docs/user-guide/cpp/error-handling.md index 0c4fe7ec..e1ec058e 100644 --- a/website/docs/user-guide/cpp/error-handling.md +++ b/website/docs/user-guide/cpp/error-handling.md @@ -18,30 +18,25 @@ if (!result.Ok()) { } ``` -| Field / Method | Type | Description | -|---|---|---| -| `error_code` | `int32_t` | 0 for success, non-zero for errors | -| `error_message` | `std::string` | Human-readable error description | -| `Ok()` | `bool` | Returns `true` if the operation succeeded | +| Field / Method | Type | Description | +|------------------|---------------|-------------------------------------------| +| `error_code` | `int32_t` | 0 for success, non-zero for errors | +| `error_message` | `std::string` | Human-readable error description | +| `Ok()` | `bool` | Returns `true` if the operation succeeded | -## Common Pattern: Helper Function +## Handling Errors -A common pattern is to define a `check` helper that exits on failure: +Check the `Result` after each operation and decide how to respond, e.g. log and continue, retry, or abort: ```cpp -static void check(const char* step, const fluss::Result& r) { - if (!r.Ok()) { - std::cerr << step << " failed: " << r.error_message << std::endl; - std::exit(1); - } +fluss::Connection conn; +fluss::Result result = fluss::Connection::Create(config, conn); +if (!result.Ok()) { + // Log, retry, or propagate the error as appropriate + std::cerr << "Connection failed (code " << result.error_code + << "): " << result.error_message << std::endl; + return 1; } - -// Usage -fluss::Configuration config; -config.bootstrap_servers = "127.0.0.1:9123"; -check("create", fluss::Connection::Create(config, conn)); -check("create_table", admin.CreateTable(table_path, descriptor, true)); -check("flush", writer.Flush()); ``` ## Connection State Checking @@ -105,7 +100,7 @@ row.Set("score", static_cast(100)); fluss::WriteResult wr; fluss::Result result = writer.Upsert(row, wr); if (!result.Ok()) { - // Partition not found — create partitions before writing + // Partition not found, create partitions before writing std::cerr << "Write error: " << result.error_message << std::endl; } ``` @@ -127,8 +122,7 @@ if (!result.Ok()) { ## Best Practices -1. **Always check `Result`** -- Never ignore the return value of operations that return `Result`. -2. **Use a helper function** -- Define a `check()` helper to reduce boilerplate for fatal errors. -3. **Handle errors gracefully** -- For production code, log errors and retry or fail gracefully instead of calling `std::exit()`. -4. **Verify connection state** -- Use `Available()` to check connection validity before operations. -5. **Create partitions before writing** -- For partitioned primary key tables, always create partitions before attempting upserts. +1. **Always check `Result`**: Never ignore the return value of operations that return `Result`. +2. **Handle errors gracefully**: Log errors and retry or fail gracefully rather than crashing. +3. **Verify connection state**: Use `Available()` to check connection validity before operations. +4. **Create partitions before writing**: For partitioned primary key tables, always create partitions before attempting upserts. diff --git a/website/docs/user-guide/cpp/example/admin-operations.md b/website/docs/user-guide/cpp/example/admin-operations.md index bff55e80..c27dc209 100644 --- a/website/docs/user-guide/cpp/example/admin-operations.md +++ b/website/docs/user-guide/cpp/example/admin-operations.md @@ -7,7 +7,7 @@ sidebar_position: 3 ```cpp fluss::Admin admin; -check("get_admin", conn.GetAdmin(admin)); +conn.GetAdmin(admin); ``` ## Table Operations @@ -29,18 +29,18 @@ auto descriptor = fluss::TableDescriptor::NewBuilder() .Build(); // Create table -check("create_table", admin.CreateTable(table_path, descriptor, true)); +admin.CreateTable(table_path, descriptor, true); // Get table information fluss::TableInfo table_info; -check("get_table", admin.GetTableInfo(table_path, table_info)); +admin.GetTableInfo(table_path, table_info); std::cout << "Table ID: " << table_info.table_id << std::endl; std::cout << "Number of buckets: " << table_info.num_buckets << std::endl; std::cout << "Has primary key: " << table_info.has_primary_key << std::endl; std::cout << "Is partitioned: " << table_info.is_partitioned << std::endl; // Drop table -check("drop_table", admin.DropTable(table_path, true)); +admin.DropTable(table_path, true); ``` ## Schema Builder Options @@ -72,36 +72,32 @@ std::vector bucket_ids = {0, 1, 2}; // Query earliest offsets std::unordered_map earliest_offsets; -check("list_offsets", - admin.ListOffsets(table_path, bucket_ids, - fluss::OffsetQuery::Earliest(), earliest_offsets)); +admin.ListOffsets(table_path, bucket_ids, + fluss::OffsetQuery::Earliest(), earliest_offsets); // Query latest offsets std::unordered_map latest_offsets; -check("list_offsets", - admin.ListOffsets(table_path, bucket_ids, - fluss::OffsetQuery::Latest(), latest_offsets)); +admin.ListOffsets(table_path, bucket_ids, + fluss::OffsetQuery::Latest(), latest_offsets); // Query offsets for a specific timestamp std::unordered_map timestamp_offsets; -check("list_offsets", - admin.ListOffsets(table_path, bucket_ids, - fluss::OffsetQuery::FromTimestamp(timestamp_ms), - timestamp_offsets)); +admin.ListOffsets(table_path, bucket_ids, + fluss::OffsetQuery::FromTimestamp(timestamp_ms), + timestamp_offsets); // Query partition offsets std::unordered_map partition_offsets; -check("list_partition_offsets", - admin.ListPartitionOffsets(table_path, "partition_name", - bucket_ids, fluss::OffsetQuery::Latest(), - partition_offsets)); +admin.ListPartitionOffsets(table_path, "partition_name", + bucket_ids, fluss::OffsetQuery::Latest(), + partition_offsets); ``` ## Lake Snapshot ```cpp fluss::LakeSnapshot snapshot; -check("get_snapshot", admin.GetLatestLakeSnapshot(table_path, snapshot)); +admin.GetLatestLakeSnapshot(table_path, snapshot); std::cout << "Snapshot ID: " << snapshot.snapshot_id << std::endl; for (const auto& bucket_offset : snapshot.bucket_offsets) { std::cout << " Table " << bucket_offset.table_id diff --git a/website/docs/user-guide/cpp/example/configuration.md b/website/docs/user-guide/cpp/example/configuration.md index 076710ee..c4fc6678 100644 --- a/website/docs/user-guide/cpp/example/configuration.md +++ b/website/docs/user-guide/cpp/example/configuration.md @@ -33,17 +33,3 @@ config.writer_batch_size = 2 * 1024 * 1024; // Batch size (2 MB) config.scanner_remote_log_prefetch_num = 4; // Remote log prefetch count config.remote_file_download_thread_num = 3; // Download threads ``` - -## Error Handling - -All C++ operations return a `fluss::Result`. Check with `Ok()` before continuing: - -```cpp -static void check(const char* step, const fluss::Result& r) { - if (!r.Ok()) { - std::cerr << step << " failed: code=" << r.error_code - << " msg=" << r.error_message << std::endl; - std::exit(1); - } -} -``` diff --git a/website/docs/user-guide/cpp/example/index.md b/website/docs/user-guide/cpp/example/index.md index 5afdb5b5..51f60e41 100644 --- a/website/docs/user-guide/cpp/example/index.md +++ b/website/docs/user-guide/cpp/example/index.md @@ -3,29 +3,22 @@ sidebar_position: 1 --- # Example -Minimal working examples: connect to Fluss, create a table, write data, and read it back. +Minimal working example: connect to Fluss, create a table, write data, and read it back. ```cpp #include #include "fluss.hpp" -static void check(const char* step, const fluss::Result& r) { - if (!r.Ok()) { - std::cerr << step << " failed: " << r.error_message << std::endl; - std::exit(1); - } -} - int main() { // Connect fluss::Configuration config; config.bootstrap_servers = "127.0.0.1:9123"; fluss::Connection conn; - check("create", fluss::Connection::Create(config, conn)); + fluss::Connection::Create(config, conn); fluss::Admin admin; - check("get_admin", conn.GetAdmin(admin)); + conn.GetAdmin(admin); // Create a log table fluss::TablePath table_path("fluss", "quickstart_cpp"); @@ -36,30 +29,30 @@ int main() { auto descriptor = fluss::TableDescriptor::NewBuilder() .SetSchema(schema) .Build(); - check("create_table", admin.CreateTable(table_path, descriptor, true)); + admin.CreateTable(table_path, descriptor, true); // Write fluss::Table table; - check("get_table", conn.GetTable(table_path, table)); + conn.GetTable(table_path, table); fluss::AppendWriter writer; - check("new_writer", table.NewAppend().CreateWriter(writer)); + table.NewAppend().CreateWriter(writer); fluss::GenericRow row; row.SetInt32(0, 1); row.SetString(1, "hello"); - check("append", writer.Append(row)); - check("flush", writer.Flush()); + writer.Append(row); + writer.Flush(); // Read fluss::LogScanner scanner; - check("new_scanner", table.NewScan().CreateLogScanner(scanner)); + table.NewScan().CreateLogScanner(scanner); auto info = table.GetTableInfo(); for (int b = 0; b < info.num_buckets; ++b) { - check("subscribe", scanner.Subscribe(b, 0)); + scanner.Subscribe(b, 0); } fluss::ScanRecords records; - check("poll", scanner.Poll(5000, records)); + scanner.Poll(5000, records); for (const auto& rec : records) { std::cout << "id=" << rec.row.GetInt32(0) << ", name=" << rec.row.GetString(1) << std::endl; diff --git a/website/docs/user-guide/cpp/example/log-tables.md b/website/docs/user-guide/cpp/example/log-tables.md index 84bcbb0f..c94bb845 100644 --- a/website/docs/user-guide/cpp/example/log-tables.md +++ b/website/docs/user-guide/cpp/example/log-tables.md @@ -19,40 +19,40 @@ auto descriptor = fluss::TableDescriptor::NewBuilder() .Build(); fluss::TablePath table_path("fluss", "events"); -check("create_table", admin.CreateTable(table_path, descriptor, true)); +admin.CreateTable(table_path, descriptor, true); ``` ## Writing to Log Tables ```cpp fluss::Table table; -check("get_table", conn.GetTable(table_path, table)); +conn.GetTable(table_path, table); fluss::AppendWriter writer; -check("new_writer", table.NewAppend().CreateWriter(writer)); +table.NewAppend().CreateWriter(writer); fluss::GenericRow row; row.SetInt32(0, 1); // event_id row.SetString(1, "user_login"); // event_type row.SetInt64(2, 1704067200000L); // timestamp -check("append", writer.Append(row)); +writer.Append(row); -check("flush", writer.Flush()); +writer.Flush(); ``` ## Reading from Log Tables ```cpp fluss::LogScanner scanner; -check("new_scanner", table.NewScan().CreateLogScanner(scanner)); +table.NewScan().CreateLogScanner(scanner); auto info = table.GetTableInfo(); for (int b = 0; b < info.num_buckets; ++b) { - check("subscribe", scanner.Subscribe(b, 0)); + scanner.Subscribe(b, 0); } fluss::ScanRecords records; -check("poll", scanner.Poll(5000, records)); // timeout in ms +scanner.Poll(5000, records); // timeout in ms for (const auto& rec : records) { std::cout << "event_id=" << rec.row.GetInt32(0) @@ -68,7 +68,14 @@ for (const auto& rec : records) { std::vector subscriptions; subscriptions.push_back({0, 0}); // bucket 0, offset 0 subscriptions.push_back({1, 100}); // bucket 1, offset 100 -check("subscribe_batch", scanner.Subscribe(subscriptions)); +scanner.Subscribe(subscriptions); +``` + +**Unsubscribe from a bucket:** + +```cpp +// Stop receiving records from bucket 1 +scanner.Unsubscribe(1); ``` **Arrow RecordBatch polling (high performance):** @@ -77,14 +84,14 @@ check("subscribe_batch", scanner.Subscribe(subscriptions)); #include fluss::LogScanner arrow_scanner; -check("new_scanner", table.NewScan().CreateRecordBatchLogScanner(arrow_scanner)); +table.NewScan().CreateRecordBatchLogScanner(arrow_scanner); for (int b = 0; b < info.num_buckets; ++b) { - check("subscribe", arrow_scanner.Subscribe(b, 0)); + arrow_scanner.Subscribe(b, 0); } fluss::ArrowRecordBatches batches; -check("poll", arrow_scanner.PollRecordBatch(5000, batches)); +arrow_scanner.PollRecordBatch(5000, batches); for (size_t i = 0; i < batches.Size(); ++i) { const auto& batch = batches[i]; @@ -102,16 +109,13 @@ for (size_t i = 0; i < batches.Size(); ++i) { ```cpp // Project by column index fluss::LogScanner projected_scanner; -check("new_scanner", - table.NewScan().ProjectByIndex({0, 2}).CreateLogScanner(projected_scanner)); +table.NewScan().ProjectByIndex({0, 2}).CreateLogScanner(projected_scanner); // Project by column name fluss::LogScanner name_projected_scanner; -check("new_scanner", - table.NewScan().ProjectByName({"event_id", "timestamp"}).CreateLogScanner(name_projected_scanner)); +table.NewScan().ProjectByName({"event_id", "timestamp"}).CreateLogScanner(name_projected_scanner); // Arrow RecordBatch with projection fluss::LogScanner projected_arrow_scanner; -check("new_scanner", - table.NewScan().ProjectByIndex({0, 2}).CreateRecordBatchLogScanner(projected_arrow_scanner)); +table.NewScan().ProjectByIndex({0, 2}).CreateRecordBatchLogScanner(projected_arrow_scanner); ``` diff --git a/website/docs/user-guide/cpp/example/partitioned-tables.md b/website/docs/user-guide/cpp/example/partitioned-tables.md index 61a6dacd..6a6927f5 100644 --- a/website/docs/user-guide/cpp/example/partitioned-tables.md +++ b/website/docs/user-guide/cpp/example/partitioned-tables.md @@ -24,27 +24,27 @@ auto descriptor = fluss::TableDescriptor::NewBuilder() .Build(); fluss::TablePath table_path("fluss", "partitioned_events"); -check("create_table", admin.CreateTable(table_path, descriptor, true)); +admin.CreateTable(table_path, descriptor, true); ``` ### Writing to Partitioned Log Tables -**Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** Include partition column values in each row — the client routes records to the correct partition automatically. +**Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** Include partition column values in each row, the client routes records to the correct partition automatically. ```cpp fluss::Table table; -check("get_table", conn.GetTable(table_path, table)); +conn.GetTable(table_path, table); fluss::AppendWriter writer; -check("new_writer", table.NewAppend().CreateWriter(writer)); +table.NewAppend().CreateWriter(writer); fluss::GenericRow row; row.SetInt32(0, 1); row.SetString(1, "user_login"); row.SetString(2, "2024-01-15"); row.SetString(3, "US"); -check("append", writer.Append(row)); -check("flush", writer.Flush()); +writer.Append(row); +writer.Flush(); ``` ### Reading from Partitioned Log Tables @@ -53,18 +53,18 @@ For partitioned tables, use partition-aware subscribe methods. ```cpp fluss::Table table; -check("get_table", conn.GetTable(table_path, table)); +conn.GetTable(table_path, table); fluss::LogScanner scanner; -check("new_scanner", table.NewScan().CreateLogScanner(scanner)); +table.NewScan().CreateLogScanner(scanner); // Subscribe to individual partitions for (const auto& pi : partition_infos) { - check("subscribe", scanner.SubscribePartitionBuckets(pi.partition_id, 0, 0)); + scanner.SubscribePartitionBuckets(pi.partition_id, 0, 0); } fluss::ScanRecords records; -check("poll", scanner.Poll(5000, records)); +scanner.Poll(5000, records); for (const auto& rec : records) { std::cout << "bucket_id=" << rec.bucket_id @@ -73,32 +73,37 @@ for (const auto& rec : records) { // Or batch-subscribe to all partitions at once fluss::LogScanner batch_scanner; -check("new_scanner", table.NewScan().CreateLogScanner(batch_scanner)); +table.NewScan().CreateLogScanner(batch_scanner); std::vector subs; for (const auto& pi : partition_infos) { subs.push_back({pi.partition_id, 0, 0}); } -check("subscribe", batch_scanner.SubscribePartitionBuckets(subs)); +batch_scanner.SubscribePartitionBuckets(subs); +``` + +**Unsubscribe from a partition bucket:** + +```cpp +// Stop receiving records from a specific partition bucket +scanner.UnsubscribePartition(partition_infos[0].partition_id, 0); ``` ### Managing Partitions ```cpp // Create a partition -check("create_partition", - admin.CreatePartition(table_path, {{"dt", "2024-01-15"}, {"region", "EMEA"}}, true)); +admin.CreatePartition(table_path, {{"dt", "2024-01-15"}, {"region", "EMEA"}}, true); // List partitions std::vector partition_infos; -check("list_partitions", admin.ListPartitionInfos(table_path, partition_infos)); +admin.ListPartitionInfos(table_path, partition_infos); // Query partition offsets std::vector bucket_ids = {0, 1, 2}; std::unordered_map offsets; -check("list_partition_offsets", - admin.ListPartitionOffsets(table_path, "2024-01-15$US", - bucket_ids, fluss::OffsetQuery::Latest(), offsets)); +admin.ListPartitionOffsets(table_path, "2024-01-15$US", + bucket_ids, fluss::OffsetQuery::Latest(), offsets); ``` ## Partitioned Primary Key Tables @@ -123,7 +128,7 @@ auto descriptor = fluss::TableDescriptor::NewBuilder() .Build(); fluss::TablePath table_path("fluss", "partitioned_users"); -check("create_table", admin.CreateTable(table_path, descriptor, true)); +admin.CreateTable(table_path, descriptor, true); ``` ### Writing to Partitioned Primary Key Tables @@ -132,23 +137,23 @@ check("create_table", admin.CreateTable(table_path, descriptor, true)); ```cpp fluss::Table table; -check("get_table", conn.GetTable(table_path, table)); +conn.GetTable(table_path, table); // Create partitions first -check("create_APAC", admin.CreatePartition(table_path, {{"region", "APAC"}, {"zone", "1"}}, true)); -check("create_EMEA", admin.CreatePartition(table_path, {{"region", "EMEA"}, {"zone", "2"}}, true)); -check("create_US", admin.CreatePartition(table_path, {{"region", "US"}, {"zone", "3"}}, true)); +admin.CreatePartition(table_path, {{"region", "APAC"}, {"zone", "1"}}, true); +admin.CreatePartition(table_path, {{"region", "EMEA"}, {"zone", "2"}}, true); +admin.CreatePartition(table_path, {{"region", "US"}, {"zone", "3"}}, true); fluss::UpsertWriter writer; -check("new_writer", table.NewUpsert().CreateWriter(writer)); +table.NewUpsert().CreateWriter(writer); auto row = table.NewRow(); row.Set("user_id", 1001); row.Set("region", "APAC"); row.Set("zone", static_cast(1)); row.Set("score", static_cast(1234)); -check("upsert", writer.Upsert(row)); -check("flush", writer.Flush()); +writer.Upsert(row); +writer.Flush(); ``` ### Looking Up Records in Partitioned Tables @@ -159,7 +164,7 @@ Lookup requires all primary key columns including partition columns. ```cpp fluss::Lookuper lookuper; -check("new_lookuper", table.NewLookup().CreateLookuper(lookuper)); +table.NewLookup().CreateLookuper(lookuper); auto pk = table.NewRow(); pk.Set("user_id", 1001); @@ -168,7 +173,7 @@ pk.Set("zone", static_cast(1)); bool found = false; fluss::GenericRow result; -check("lookup", lookuper.Lookup(pk, found, result)); +lookuper.Lookup(pk, found, result); if (found) { std::cout << "score=" << result.GetInt64(3) << std::endl; } diff --git a/website/docs/user-guide/cpp/example/primary-key-tables.md b/website/docs/user-guide/cpp/example/primary-key-tables.md index 196c4a4d..7aa87e31 100644 --- a/website/docs/user-guide/cpp/example/primary-key-tables.md +++ b/website/docs/user-guide/cpp/example/primary-key-tables.md @@ -21,17 +21,17 @@ auto descriptor = fluss::TableDescriptor::NewBuilder() .Build(); fluss::TablePath table_path("fluss", "users"); -check("create_table", admin.CreateTable(table_path, descriptor, true)); +admin.CreateTable(table_path, descriptor, true); ``` ## Upserting Records ```cpp fluss::Table table; -check("get_table", conn.GetTable(table_path, table)); +conn.GetTable(table_path, table); fluss::UpsertWriter upsert_writer; -check("new_upsert_writer", table.NewUpsert().CreateWriter(upsert_writer)); +table.NewUpsert().CreateWriter(upsert_writer); // Fire-and-forget upserts { @@ -39,16 +39,16 @@ check("new_upsert_writer", table.NewUpsert().CreateWriter(upsert_writer)); row.Set("id", 1); row.Set("name", "Alice"); row.Set("age", static_cast(25)); - check("upsert", upsert_writer.Upsert(row)); + upsert_writer.Upsert(row); } { auto row = table.NewRow(); row.Set("id", 2); row.Set("name", "Bob"); row.Set("age", static_cast(30)); - check("upsert", upsert_writer.Upsert(row)); + upsert_writer.Upsert(row); } -check("flush", upsert_writer.Flush()); +upsert_writer.Flush(); // Per-record acknowledgment { @@ -57,8 +57,8 @@ check("flush", upsert_writer.Flush()); row.Set("name", "Charlie"); row.Set("age", static_cast(35)); fluss::WriteResult wr; - check("upsert", upsert_writer.Upsert(row, wr)); - check("wait", wr.Wait()); + upsert_writer.Upsert(row, wr); + wr.Wait(); } ``` @@ -72,8 +72,8 @@ row.Set("id", 1); row.Set("name", "Alice Updated"); row.Set("age", static_cast(26)); fluss::WriteResult wr; -check("upsert", upsert_writer.Upsert(row, wr)); -check("wait", wr.Wait()); +upsert_writer.Upsert(row, wr); +wr.Wait(); ``` ## Deleting Records @@ -82,8 +82,8 @@ check("wait", wr.Wait()); auto pk_row = table.NewRow(); pk_row.Set("id", 2); fluss::WriteResult wr; -check("delete", upsert_writer.Delete(pk_row, wr)); -check("wait", wr.Wait()); +upsert_writer.Delete(pk_row, wr); +wr.Wait(); ``` ## Partial Updates @@ -93,38 +93,36 @@ Update only specific columns while preserving others. ```cpp // By column names fluss::UpsertWriter partial_writer; -check("new_partial_writer", - table.NewUpsert() - .PartialUpdateByName({"id", "age"}) - .CreateWriter(partial_writer)); +table.NewUpsert() + .PartialUpdateByName({"id", "age"}) + .CreateWriter(partial_writer); auto row = table.NewRow(); row.Set("id", 1); row.Set("age", static_cast(27)); fluss::WriteResult wr; -check("partial_upsert", partial_writer.Upsert(row, wr)); -check("wait", wr.Wait()); +partial_writer.Upsert(row, wr); +wr.Wait(); // By column indices fluss::UpsertWriter partial_writer_idx; -check("new_partial_writer", - table.NewUpsert() - .PartialUpdateByIndex({0, 2}) - .CreateWriter(partial_writer_idx)); +table.NewUpsert() + .PartialUpdateByIndex({0, 2}) + .CreateWriter(partial_writer_idx); ``` ## Looking Up Records ```cpp fluss::Lookuper lookuper; -check("new_lookuper", table.NewLookup().CreateLookuper(lookuper)); +table.NewLookup().CreateLookuper(lookuper); auto pk_row = table.NewRow(); pk_row.Set("id", 1); bool found = false; fluss::GenericRow result_row; -check("lookup", lookuper.Lookup(pk_row, found, result_row)); +lookuper.Lookup(pk_row, found, result_row); if (found) { std::cout << "Found: name=" << result_row.GetString(1) diff --git a/website/docs/user-guide/cpp/installation.md b/website/docs/user-guide/cpp/installation.md index e28093e6..6360da43 100644 --- a/website/docs/user-guide/cpp/installation.md +++ b/website/docs/user-guide/cpp/installation.md @@ -49,8 +49,8 @@ cmake --build . ``` This produces: -- `libfluss_cpp.a` — Static library -- `fluss_cpp_example` — Example executable +- `libfluss_cpp.a` (Static library) +- `fluss_cpp_example` (Example executable) - Header files in `include/` ## Integrating into Your Project diff --git a/website/docs/user-guide/python/api-reference.md b/website/docs/user-guide/python/api-reference.md index 9f6ce766..99437630 100644 --- a/website/docs/user-guide/python/api-reference.md +++ b/website/docs/user-guide/python/api-reference.md @@ -7,269 +7,275 @@ Complete API reference for the Fluss Python client. ## `Config` -| Method / Property | Description | -|---|---| +| Method / Property | Description | +|-----------------------------------|----------------------------------------------| | `Config(properties: dict = None)` | Create config from a dict of key-value pairs | -| `.bootstrap_servers` | Get/set coordinator server address | -| `.writer_request_max_size` | Get/set max request size in bytes | -| `.writer_batch_size` | Get/set write batch size in bytes | +| `.bootstrap_servers` | Get/set coordinator server address | +| `.writer_request_max_size` | Get/set max request size in bytes | +| `.writer_batch_size` | Get/set write batch size in bytes | ## `FlussConnection` -| Method | Description | -|---|---| -| `await FlussConnection.create(config) -> FlussConnection` | Connect to a Fluss cluster | -| `await conn.get_admin() -> FlussAdmin` | Get admin interface | -| `await conn.get_table(table_path) -> FlussTable` | Get a table for read/write operations | -| `conn.close()` | Close the connection | +| Method | Description | +|-----------------------------------------------------------|---------------------------------------| +| `await FlussConnection.create(config) -> FlussConnection` | Connect to a Fluss cluster | +| `await conn.get_admin() -> FlussAdmin` | Get admin interface | +| `await conn.get_table(table_path) -> FlussTable` | Get a table for read/write operations | +| `conn.close()` | Close the connection | Supports `with` statement (context manager). ## `FlussAdmin` -| Method | Description | -|---|---| -| `await create_database(name, database_descriptor=None, ignore_if_exists=False)` | Create a database | -| `await drop_database(name, ignore_if_not_exists=False, cascade=True)` | Drop a database | -| `await list_databases() -> list[str]` | List all databases | -| `await database_exists(name) -> bool` | Check if a database exists | -| `await get_database_info(name) -> DatabaseInfo` | Get database metadata | -| `await create_table(table_path, table_descriptor, ignore_if_exists=False)` | Create a table | -| `await drop_table(table_path, ignore_if_not_exists=False)` | Drop a table | -| `await get_table_info(table_path) -> TableInfo` | Get table metadata | -| `await list_tables(database_name) -> list[str]` | List tables in a database | -| `await table_exists(table_path) -> bool` | Check if a table exists | -| `await list_offsets(table_path, bucket_ids, offset_type, timestamp=None) -> dict[int, int]` | Get offsets for buckets | +| Method | Description | +|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------| +| `await create_database(name, database_descriptor=None, ignore_if_exists=False)` | Create a database | +| `await drop_database(name, ignore_if_not_exists=False, cascade=True)` | Drop a database | +| `await list_databases() -> list[str]` | List all databases | +| `await database_exists(name) -> bool` | Check if a database exists | +| `await get_database_info(name) -> DatabaseInfo` | Get database metadata | +| `await create_table(table_path, table_descriptor, ignore_if_exists=False)` | Create a table | +| `await drop_table(table_path, ignore_if_not_exists=False)` | Drop a table | +| `await get_table_info(table_path) -> TableInfo` | Get table metadata | +| `await list_tables(database_name) -> list[str]` | List tables in a database | +| `await table_exists(table_path) -> bool` | Check if a table exists | +| `await list_offsets(table_path, bucket_ids, offset_type, timestamp=None) -> dict[int, int]` | Get offsets for buckets | | `await list_partition_offsets(table_path, partition_name, bucket_ids, offset_type, timestamp=None) -> dict[int, int]` | Get offsets for a partition's buckets | -| `await create_partition(table_path, partition_spec, ignore_if_exists=False)` | Create a partition | -| `await drop_partition(table_path, partition_spec, ignore_if_not_exists=False)` | Drop a partition | -| `await list_partition_infos(table_path) -> list[PartitionInfo]` | List partitions | -| `await get_latest_lake_snapshot(table_path) -> LakeSnapshot` | Get latest lake snapshot | +| `await create_partition(table_path, partition_spec, ignore_if_exists=False)` | Create a partition | +| `await drop_partition(table_path, partition_spec, ignore_if_not_exists=False)` | Drop a partition | +| `await list_partition_infos(table_path) -> list[PartitionInfo]` | List partitions | +| `await get_latest_lake_snapshot(table_path) -> LakeSnapshot` | Get latest lake snapshot | ## `FlussTable` -| Method | Description | -|---|---| -| `new_scan() -> TableScan` | Create a scan builder | -| `new_append() -> TableAppend` | Create an append builder for log tables | -| `new_upsert() -> TableUpsert` | Create an upsert builder for PK tables | -| `new_lookup() -> TableLookup` | Create a lookup builder for PK tables | -| `get_table_info() -> TableInfo` | Get table metadata | -| `get_table_path() -> TablePath` | Get table path | -| `has_primary_key() -> bool` | Check if table has a primary key | +| Method | Description | +|---------------------------------|-----------------------------------------| +| `new_scan() -> TableScan` | Create a scan builder | +| `new_append() -> TableAppend` | Create an append builder for log tables | +| `new_upsert() -> TableUpsert` | Create an upsert builder for PK tables | +| `new_lookup() -> TableLookup` | Create a lookup builder for PK tables | +| `get_table_info() -> TableInfo` | Get table metadata | +| `get_table_path() -> TablePath` | Get table path | +| `has_primary_key() -> bool` | Check if table has a primary key | ## `TableScan` -| Method | Description | -|---|---| -| `.project(indices) -> TableScan` | Project columns by index | -| `.project_by_name(names) -> TableScan` | Project columns by name | -| `await .create_log_scanner() -> LogScanner` | Create record-based scanner (for `poll()`) | +| Method | Description | +|----------------------------------------------------------|---------------------------------------------------------------------| +| `.project(indices) -> TableScan` | Project columns by index | +| `.project_by_name(names) -> TableScan` | Project columns by name | +| `await .create_log_scanner() -> LogScanner` | Create record-based scanner (for `poll()`) | | `await .create_record_batch_log_scanner() -> LogScanner` | Create batch-based scanner (for `poll_arrow()`, `to_arrow()`, etc.) | ## `TableAppend` Builder for creating an `AppendWriter`. Obtain via `FlussTable.new_append()`. -| Method | Description | -|---|---| +| Method | Description | +|------------------------------------|--------------------------| | `.create_writer() -> AppendWriter` | Create the append writer | ## `TableUpsert` Builder for creating an `UpsertWriter`. Obtain via `FlussTable.new_upsert()`. -| Method | Description | -|---|---| -| `.partial_update_by_name(columns) -> TableUpsert` | Configure partial update by column names | +| Method | Description | +|----------------------------------------------------|--------------------------------------------| +| `.partial_update_by_name(columns) -> TableUpsert` | Configure partial update by column names | | `.partial_update_by_index(indices) -> TableUpsert` | Configure partial update by column indices | -| `.create_writer() -> UpsertWriter` | Create the upsert writer | +| `.create_writer() -> UpsertWriter` | Create the upsert writer | ## `TableLookup` Builder for creating a `Lookuper`. Obtain via `FlussTable.new_lookup()`. -| Method | Description | -|---|---| +| Method | Description | +|----------------------------------|---------------------| | `.create_lookuper() -> Lookuper` | Create the lookuper | ## `AppendWriter` -| Method | Description | -|---|---| -| `.append(row) -> WriteResultHandle` | Append a row (dict, list, or tuple) | -| `.write_arrow(table)` | Write a PyArrow Table | -| `.write_arrow_batch(batch) -> WriteResultHandle` | Write a PyArrow RecordBatch | -| `.write_pandas(df)` | Write a Pandas DataFrame | -| `await .flush()` | Flush all pending writes | +| Method | Description | +|--------------------------------------------------|-------------------------------------| +| `.append(row) -> WriteResultHandle` | Append a row (dict, list, or tuple) | +| `.write_arrow(table)` | Write a PyArrow Table | +| `.write_arrow_batch(batch) -> WriteResultHandle` | Write a PyArrow RecordBatch | +| `.write_pandas(df)` | Write a Pandas DataFrame | +| `await .flush()` | Flush all pending writes | ## `UpsertWriter` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------|---------------------------------------| | `.upsert(row) -> WriteResultHandle` | Upsert a row (insert or update by PK) | -| `.delete(pk) -> WriteResultHandle` | Delete a row by primary key | -| `await .flush()` | Flush all pending operations | +| `.delete(pk) -> WriteResultHandle` | Delete a row by primary key | +| `await .flush()` | Flush all pending operations | ## `WriteResultHandle` -| Method | Description | -|---|---| +| Method | Description | +|-----------------|----------------------------------------------| | `await .wait()` | Wait for server acknowledgment of this write | ## `Lookuper` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------|-----------------------------| | `await .lookup(pk) -> dict \| None` | Lookup a row by primary key | ## `LogScanner` -| Method | Description | -|---|---| -| `.subscribe(bucket_id, start_offset)` | Subscribe to a bucket | -| `.subscribe_buckets(bucket_offsets)` | Subscribe to multiple buckets (`{bucket_id: offset}`) | -| `.subscribe_partition(partition_id, bucket_id, start_offset)` | Subscribe to a partition bucket | -| `.subscribe_partition_buckets(partition_bucket_offsets)` | Subscribe to multiple partition+bucket combos (`{(part_id, bucket_id): offset}`) | -| `.unsubscribe(bucket_id)` | Unsubscribe from a bucket (non-partitioned tables) | -| `.unsubscribe_partition(partition_id, bucket_id)` | Unsubscribe from a partition bucket | -| `.poll(timeout_ms) -> list[ScanRecord]` | Poll individual records (record scanner only) | -| `.poll_arrow(timeout_ms) -> pa.Table` | Poll as Arrow Table (batch scanner only) | -| `.poll_record_batch(timeout_ms) -> list[RecordBatch]` | Poll batches with metadata (batch scanner only) | -| `.to_arrow() -> pa.Table` | Read all subscribed data as Arrow Table (batch scanner only) | -| `.to_pandas() -> pd.DataFrame` | Read all subscribed data as DataFrame (batch scanner only) | +| Method | Description | +|---------------------------------------------------------------|----------------------------------------------------------------------------------| +| `.subscribe(bucket_id, start_offset)` | Subscribe to a bucket | +| `.subscribe_buckets(bucket_offsets)` | Subscribe to multiple buckets (`{bucket_id: offset}`) | +| `.subscribe_partition(partition_id, bucket_id, start_offset)` | Subscribe to a partition bucket | +| `.subscribe_partition_buckets(partition_bucket_offsets)` | Subscribe to multiple partition+bucket combos (`{(part_id, bucket_id): offset}`) | +| `.unsubscribe(bucket_id)` | Unsubscribe from a bucket (non-partitioned tables) | +| `.unsubscribe_partition(partition_id, bucket_id)` | Unsubscribe from a partition bucket | +| `.poll(timeout_ms) -> list[ScanRecord]` | Poll individual records (record scanner only) | +| `.poll_arrow(timeout_ms) -> pa.Table` | Poll as Arrow Table (batch scanner only) | +| `.poll_record_batch(timeout_ms) -> list[RecordBatch]` | Poll batches with metadata (batch scanner only) | +| `.to_arrow() -> pa.Table` | Read all subscribed data as Arrow Table (batch scanner only) | +| `.to_pandas() -> pd.DataFrame` | Read all subscribed data as DataFrame (batch scanner only) | ## `ScanRecord` -| Property | Description | -|---|---| -| `.bucket -> TableBucket` | Bucket this record belongs to | -| `.offset -> int` | Record offset in the log | -| `.timestamp -> int` | Record timestamp | +| Property | Description | +|------------------------------|---------------------------------------------------------------------| +| `.bucket -> TableBucket` | Bucket this record belongs to | +| `.offset -> int` | Record offset in the log | +| `.timestamp -> int` | Record timestamp | | `.change_type -> ChangeType` | Change type (AppendOnly, Insert, UpdateBefore, UpdateAfter, Delete) | -| `.row -> dict` | Row data as `{column_name: value}` | +| `.row -> dict` | Row data as `{column_name: value}` | ## `RecordBatch` -| Property | Description | -|---|---| -| `.batch -> pa.RecordBatch` | Arrow RecordBatch data | -| `.bucket -> TableBucket` | Bucket this batch belongs to | -| `.base_offset -> int` | First record offset | -| `.last_offset -> int` | Last record offset | +| Property | Description | +|----------------------------|------------------------------| +| `.batch -> pa.RecordBatch` | Arrow RecordBatch data | +| `.bucket -> TableBucket` | Bucket this batch belongs to | +| `.base_offset -> int` | First record offset | +| `.last_offset -> int` | Last record offset | ## `Schema` -| Method | Description | -|---|---| +| Method | Description | +|------------------------------------------------|----------------------------| | `Schema(schema: pa.Schema, primary_keys=None)` | Create from PyArrow schema | -| `.get_column_names() -> list[str]` | Get column names | -| `.get_column_types() -> list[str]` | Get column type names | +| `.get_column_names() -> list[str]` | Get column names | +| `.get_column_types() -> list[str]` | Get column type names | ## `TableDescriptor` -| Method | Description | -|---|---| +| Method | Description | +|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------| | `TableDescriptor(schema, *, partition_keys=None, bucket_count=None, bucket_keys=None, comment=None, log_format=None, kv_format=None, properties=None, custom_properties=None)` | Create table descriptor | -| `.get_schema() -> Schema` | Get the schema | +| `.get_schema() -> Schema` | Get the schema | ## `TablePath` -| Method / Property | Description | -|---|---| +| Method / Property | Description | +|------------------------------|---------------------| | `TablePath(database, table)` | Create a table path | -| `.database_name -> str` | Database name | -| `.table_name -> str` | Table name | +| `.database_name -> str` | Database name | +| `.table_name -> str` | Table name | ## `TableInfo` -| Property / Method | Description | -|---|---| -| `.table_id -> int` | Table ID | -| `.table_path -> TablePath` | Table path | -| `.num_buckets -> int` | Number of buckets | -| `.schema_id -> int` | Schema ID | -| `.comment -> str \| None` | Table comment | -| `.created_time -> int` | Creation timestamp | -| `.modified_time -> int` | Last modification timestamp | -| `.get_primary_keys() -> list[str]` | Primary key columns | -| `.get_partition_keys() -> list[str]` | Partition columns | -| `.get_bucket_keys() -> list[str]` | Bucket key columns | -| `.has_primary_key() -> bool` | Has primary key? | -| `.is_partitioned() -> bool` | Is partitioned? | -| `.get_schema() -> Schema` | Get table schema | -| `.get_column_names() -> list[str]` | Column names | -| `.get_column_count() -> int` | Number of columns | -| `.get_properties() -> dict` | All table properties | -| `.get_custom_properties() -> dict` | Custom properties only | +| Property / Method | Description | +|--------------------------------------|-----------------------------| +| `.table_id -> int` | Table ID | +| `.table_path -> TablePath` | Table path | +| `.num_buckets -> int` | Number of buckets | +| `.schema_id -> int` | Schema ID | +| `.comment -> str \| None` | Table comment | +| `.created_time -> int` | Creation timestamp | +| `.modified_time -> int` | Last modification timestamp | +| `.get_primary_keys() -> list[str]` | Primary key columns | +| `.get_partition_keys() -> list[str]` | Partition columns | +| `.get_bucket_keys() -> list[str]` | Bucket key columns | +| `.has_primary_key() -> bool` | Has primary key? | +| `.is_partitioned() -> bool` | Is partitioned? | +| `.get_schema() -> Schema` | Get table schema | +| `.get_column_names() -> list[str]` | Column names | +| `.get_column_count() -> int` | Number of columns | +| `.get_properties() -> dict` | All table properties | +| `.get_custom_properties() -> dict` | Custom properties only | ## `PartitionInfo` -| Property | Description | -|---|---| -| `.partition_id -> int` | Partition ID | +| Property | Description | +|--------------------------|----------------| +| `.partition_id -> int` | Partition ID | | `.partition_name -> str` | Partition name | ## `DatabaseDescriptor` -| Method / Property | Description | -|---|---| +| Method / Property | Description | +|------------------------------------------------------------|-------------------| | `DatabaseDescriptor(comment=None, custom_properties=None)` | Create descriptor | -| `.comment -> str \| None` | Database comment | -| `.get_custom_properties() -> dict` | Custom properties | +| `.comment -> str \| None` | Database comment | +| `.get_custom_properties() -> dict` | Custom properties | ## `DatabaseInfo` -| Property / Method | Description | -|---|---| -| `.database_name -> str` | Database name | -| `.created_time -> int` | Creation timestamp | -| `.modified_time -> int` | Last modification timestamp | -| `.get_database_descriptor() -> DatabaseDescriptor` | Get descriptor | +| Property / Method | Description | +|----------------------------------------------------|-----------------------------| +| `.database_name -> str` | Database name | +| `.created_time -> int` | Creation timestamp | +| `.modified_time -> int` | Last modification timestamp | +| `.get_database_descriptor() -> DatabaseDescriptor` | Get descriptor | ## `LakeSnapshot` -| Property / Method | Description | -|---|---| -| `.snapshot_id -> int` | Snapshot ID | -| `.table_buckets_offset -> dict[TableBucket, int]` | All bucket offsets | -| `.get_bucket_offset(bucket) -> int \| None` | Get offset for a bucket | -| `.get_table_buckets() -> list[TableBucket]` | Get all buckets | +| Property / Method | Description | +|---------------------------------------------------|-------------------------| +| `.snapshot_id -> int` | Snapshot ID | +| `.table_buckets_offset -> dict[TableBucket, int]` | All bucket offsets | +| `.get_bucket_offset(bucket) -> int \| None` | Get offset for a bucket | +| `.get_table_buckets() -> list[TableBucket]` | Get all buckets | ## `TableBucket` -| Method / Property | Description | -|---|---| -| `TableBucket(table_id, bucket)` | Create non-partitioned bucket | -| `TableBucket.with_partition(table_id, partition_id, bucket)` | Create partitioned bucket | -| `.table_id -> int` | Table ID | -| `.bucket_id -> int` | Bucket ID | -| `.partition_id -> int \| None` | Partition ID (None if non-partitioned) | +| Method / Property | Description | +|--------------------------------------------------------------|----------------------------------------| +| `TableBucket(table_id, bucket)` | Create non-partitioned bucket | +| `TableBucket.with_partition(table_id, partition_id, bucket)` | Create partitioned bucket | +| `.table_id -> int` | Table ID | +| `.bucket_id -> int` | Bucket ID | +| `.partition_id -> int \| None` | Partition ID (None if non-partitioned) | ## `FlussError` -| Property | Description | -|---|---| +| Property | Description | +|-------------------|---------------| | `.message -> str` | Error message | Raised for all Fluss-specific errors (connection failures, table not found, schema mismatches, etc.). Inherits from `Exception`. ## Constants -| Constant | Value | Description | -|---|---|---| -| `fluss.EARLIEST_OFFSET` | `-2` | Start reading from earliest available offset | -| `fluss.LATEST_OFFSET` | `-1` | Start reading from latest offset (only new records) | -| `fluss.OffsetType.EARLIEST` | `"earliest"` | For `list_offsets()` | -| `fluss.OffsetType.LATEST` | `"latest"` | For `list_offsets()` | -| `fluss.OffsetType.TIMESTAMP` | `"timestamp"` | For `list_offsets()` with timestamp | +| Constant | Value | Description | +|------------------------------|---------------|-----------------------------------------------------| +| `fluss.EARLIEST_OFFSET` | `-2` | Start reading from earliest available offset | +| `fluss.OffsetType.EARLIEST` | `"earliest"` | For `list_offsets()` | +| `fluss.OffsetType.LATEST` | `"latest"` | For `list_offsets()` | +| `fluss.OffsetType.TIMESTAMP` | `"timestamp"` | For `list_offsets()` with timestamp | + +To start reading from the latest offset (only new records), resolve the current offset via `list_offsets` before subscribing: + +```python +offsets = await admin.list_offsets(table_path, [0], fluss.OffsetType.LATEST) +scanner.subscribe(bucket_id=0, start_offset=offsets[0]) +``` ## `ChangeType` -| Value | Short String | Description | -|---|---|---| -| `ChangeType.AppendOnly` (0) | `+A` | Append-only | -| `ChangeType.Insert` (1) | `+I` | Insert | -| `ChangeType.UpdateBefore` (2) | `-U` | Previous value of updated row | -| `ChangeType.UpdateAfter` (3) | `+U` | New value of updated row | -| `ChangeType.Delete` (4) | `-D` | Delete | +| Value | Short String | Description | +|-------------------------------|--------------|-------------------------------| +| `ChangeType.AppendOnly` (0) | `+A` | Append-only | +| `ChangeType.Insert` (1) | `+I` | Insert | +| `ChangeType.UpdateBefore` (2) | `-U` | Previous value of updated row | +| `ChangeType.UpdateAfter` (3) | `+U` | New value of updated row | +| `ChangeType.Delete` (4) | `-D` | Delete | diff --git a/website/docs/user-guide/python/data-types.md b/website/docs/user-guide/python/data-types.md index fed5b748..608a49f9 100644 --- a/website/docs/user-guide/python/data-types.md +++ b/website/docs/user-guide/python/data-types.md @@ -5,17 +5,17 @@ sidebar_position: 3 The Python client uses PyArrow types for schema definitions: -| PyArrow Type | Fluss Type | Python Type | -|---|---|---| -| `pa.bool_()` | Boolean | `bool` | -| `pa.int8()` / `int16()` / `int32()` / `int64()` | TinyInt / SmallInt / Int / BigInt | `int` | -| `pa.float32()` / `float64()` | Float / Double | `float` | -| `pa.string()` | String | `str` | -| `pa.binary()` | Bytes | `bytes` | -| `pa.date32()` | Date | `datetime.date` | -| `pa.time32("ms")` | Time | `datetime.time` | -| `pa.timestamp("us")` | Timestamp (NTZ) | `datetime.datetime` | -| `pa.timestamp("us", tz="UTC")` | TimestampLTZ | `datetime.datetime` | -| `pa.decimal128(precision, scale)` | Decimal | `decimal.Decimal` | +| PyArrow Type | Fluss Type | Python Type | +|-------------------------------------------------|-----------------------------------|---------------------| +| `pa.bool_()` | Boolean | `bool` | +| `pa.int8()` / `int16()` / `int32()` / `int64()` | TinyInt / SmallInt / Int / BigInt | `int` | +| `pa.float32()` / `float64()` | Float / Double | `float` | +| `pa.string()` | String | `str` | +| `pa.binary()` | Bytes | `bytes` | +| `pa.date32()` | Date | `datetime.date` | +| `pa.time32("ms")` | Time | `datetime.time` | +| `pa.timestamp("us")` | Timestamp (NTZ) | `datetime.datetime` | +| `pa.timestamp("us", tz="UTC")` | TimestampLTZ | `datetime.datetime` | +| `pa.decimal128(precision, scale)` | Decimal | `decimal.Decimal` | All Python native types (`date`, `time`, `datetime`, `Decimal`) work when appending rows via dicts. diff --git a/website/docs/user-guide/python/error-handling.md b/website/docs/user-guide/python/error-handling.md index 955ea76b..3f679485 100644 --- a/website/docs/user-guide/python/error-handling.md +++ b/website/docs/user-guide/python/error-handling.md @@ -13,7 +13,7 @@ except fluss.FlussError as e: ``` Common error scenarios: -- **Connection refused** — Fluss cluster is not running or wrong address in `bootstrap.servers` -- **Table not found** — table doesn't exist or wrong database/table name -- **Partition not found** — writing to a partitioned table before creating partitions -- **Schema mismatch** — row data doesn't match the table schema +- **Connection refused**: Fluss cluster is not running or wrong address in `bootstrap.servers` +- **Table not found**: table doesn't exist or wrong database/table name +- **Partition not found**: writing to a partitioned table before creating partitions +- **Schema mismatch**: row data doesn't match the table schema diff --git a/website/docs/user-guide/python/example/admin-operations.md b/website/docs/user-guide/python/example/admin-operations.md index e905bfd5..8c62ee78 100644 --- a/website/docs/user-guide/python/example/admin-operations.md +++ b/website/docs/user-guide/python/example/admin-operations.md @@ -41,16 +41,16 @@ await admin.drop_table(table_path, ignore_if_not_exists=True) `TableDescriptor` accepts these optional parameters: -| Parameter | Description | -|---|---| -| `partition_keys` | Column names to partition by (e.g. `["region"]`) | -| `bucket_count` | Number of buckets (parallelism units) for the table | -| `bucket_keys` | Columns used to determine bucket assignment | -| `comment` | Table comment / description | -| `log_format` | Log storage format: `"ARROW"` or `"INDEXED"` | -| `kv_format` | KV storage format for primary key tables: `"INDEXED"` or `"COMPACTED"` | -| `properties` | Table configuration properties as a dict (e.g. `{"table.replication.factor": "1"}`) | -| `custom_properties` | User-defined properties as a dict | +| Parameter | Description | +|---------------------|-------------------------------------------------------------------------------------| +| `partition_keys` | Column names to partition by (e.g. `["region"]`) | +| `bucket_count` | Number of buckets (parallelism units) for the table | +| `bucket_keys` | Columns used to determine bucket assignment | +| `comment` | Table comment / description | +| `log_format` | Log storage format: `"ARROW"` or `"INDEXED"` | +| `kv_format` | KV storage format for primary key tables: `"INDEXED"` or `"COMPACTED"` | +| `properties` | Table configuration properties as a dict (e.g. `{"table.replication.factor": "1"}`) | +| `custom_properties` | User-defined properties as a dict | ## Offsets diff --git a/website/docs/user-guide/python/example/configuration.md b/website/docs/user-guide/python/example/configuration.md index b7db70d6..8e88d2a6 100644 --- a/website/docs/user-guide/python/example/configuration.md +++ b/website/docs/user-guide/python/example/configuration.md @@ -19,13 +19,13 @@ with await fluss.FlussConnection.create(config) as conn: ## Configuration Options -| Key | Description | Default | -|-----|-------------|---------| -| `bootstrap.servers` | Coordinator server address | `127.0.0.1:9123` | -| `request.max.size` | Maximum request size in bytes | `10485760` (10 MB) | -| `writer.acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | -| `writer.retries` | Number of retries on failure | `2147483647` | -| `writer.batch.size` | Batch size for writes in bytes | `2097152` (2 MB) | +| Key | Description | Default | +|---------------------|-------------------------------------------------------|--------------------| +| `bootstrap.servers` | Coordinator server address | `127.0.0.1:9123` | +| `request.max.size` | Maximum request size in bytes | `10485760` (10 MB) | +| `writer.acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | +| `writer.retries` | Number of retries on failure | `2147483647` | +| `writer.batch.size` | Batch size for writes in bytes | `2097152` (2 MB) | Remember to close the connection when done: diff --git a/website/docs/user-guide/python/example/log-tables.md b/website/docs/user-guide/python/example/log-tables.md index 88e7152c..63903a4e 100644 --- a/website/docs/user-guide/python/example/log-tables.md +++ b/website/docs/user-guide/python/example/log-tables.md @@ -49,12 +49,12 @@ await writer.flush() ## Reading There are two scanner types: -- **Batch scanner** (`create_record_batch_log_scanner()`) — returns Arrow Tables or DataFrames, best for analytics -- **Record scanner** (`create_log_scanner()`) — returns individual records with metadata (offset, timestamp, change type), best for streaming +- **Batch scanner** (`create_record_batch_log_scanner()`): returns Arrow Tables or DataFrames, best for analytics +- **Record scanner** (`create_log_scanner()`): returns individual records with metadata (offset, timestamp, change type), best for streaming And two reading modes: -- **`to_arrow()` / `to_pandas()`** — reads all data from subscribed buckets up to the current latest offset, then returns. Best for one-shot batch reads. -- **`poll_arrow()` / `poll()` / `poll_record_batch()`** — returns whatever data is available within the timeout, then returns. Call in a loop for continuous streaming. +- **`to_arrow()` / `to_pandas()`**: reads all data from subscribed buckets up to the current latest offset, then returns. Best for one-shot batch reads. +- **`poll_arrow()` / `poll()` / `poll_record_batch()`**: returns whatever data is available within the timeout, then returns. Call in a loop for continuous streaming. ### Batch Read (One-Shot) @@ -92,13 +92,25 @@ while True: print(f"offset={record.offset}, change={record.change_type.short_string()}, row={record.row}") ``` +### Unsubscribing + +To stop consuming from a bucket, use `unsubscribe()`: + +```python +scanner.unsubscribe(bucket_id=0) +``` + ### Subscribe from Latest Offset -To only consume new records (skip existing data), use `LATEST_OFFSET`: +To only consume new records (skip existing data), first resolve the current latest offset via `list_offsets`, then subscribe at that offset: ```python +admin = await conn.get_admin() +offsets = await admin.list_offsets(table_path, [0], fluss.OffsetType.LATEST) +latest = offsets[0] + scanner = await table.new_scan().create_record_batch_log_scanner() -scanner.subscribe(bucket_id=0, start_offset=fluss.LATEST_OFFSET) +scanner.subscribe(bucket_id=0, start_offset=latest) ``` ## Column Projection diff --git a/website/docs/user-guide/python/example/partitioned-tables.md b/website/docs/user-guide/python/example/partitioned-tables.md index 41ee8bb3..f8280920 100644 --- a/website/docs/user-guide/python/example/partitioned-tables.md +++ b/website/docs/user-guide/python/example/partitioned-tables.md @@ -62,6 +62,14 @@ scanner.subscribe_partition_buckets({ print(scanner.to_pandas()) ``` +### Unsubscribing + +To stop consuming from a specific partition bucket, use `unsubscribe_partition()`: + +```python +scanner.unsubscribe_partition(partition_id=partition_infos[0].partition_id, bucket_id=0) +``` + ## Partitioned Primary Key Tables Partition columns must be part of the primary key. Partitions must exist before upserting data, otherwise the client will by default retry indefinitely. diff --git a/website/docs/user-guide/rust/api-reference.md b/website/docs/user-guide/rust/api-reference.md index b5301262..4f694444 100644 --- a/website/docs/user-guide/rust/api-reference.md +++ b/website/docs/user-guide/rust/api-reference.md @@ -7,138 +7,143 @@ Complete API reference for the Fluss Rust client. ## `Config` -| Field | Type | Default | Description | -|---|---|---|---| -| `bootstrap_servers` | `String` | `"127.0.0.1:9123"` | Coordinator server address | -| `writer_request_max_size` | `i32` | `10485760` (10 MB) | Maximum request size in bytes | -| `writer_acks` | `String` | `"all"` | Acknowledgment setting (`"all"` waits for all replicas) | -| `writer_retries` | `i32` | `i32::MAX` | Number of retries on failure | -| `writer_batch_size` | `i32` | `2097152` (2 MB) | Batch size for writes in bytes | -| `scanner_remote_log_prefetch_num` | `usize` | `4` | Number of remote log segments to prefetch | -| `remote_file_download_thread_num` | `usize` | `3` | Number of threads for remote log downloads | +| Field | Type | Default | Description | +|-----------------------------------|----------|--------------------|---------------------------------------------------------| +| `bootstrap_servers` | `String` | `"127.0.0.1:9123"` | Coordinator server address | +| `writer_request_max_size` | `i32` | `10485760` (10 MB) | Maximum request size in bytes | +| `writer_acks` | `String` | `"all"` | Acknowledgment setting (`"all"` waits for all replicas) | +| `writer_retries` | `i32` | `i32::MAX` | Number of retries on failure | +| `writer_batch_size` | `i32` | `2097152` (2 MB) | Batch size for writes in bytes | +| `scanner_remote_log_prefetch_num` | `usize` | `4` | Number of remote log segments to prefetch | +| `remote_file_download_thread_num` | `usize` | `3` | Number of threads for remote log downloads | ## `FlussConnection` -| Method | Description | -|---|---| -| `async fn new(config: Config) -> Result` | Create a new connection to a Fluss cluster | -| `async fn get_admin(&self) -> Result` | Get the admin interface for cluster management | -| `async fn get_table(&self, table_path: &TablePath) -> Result>` | Get a table for read/write operations | -| `fn config(&self) -> &Config` | Get a reference to the connection config | +| Method | Description | +|-------------------------------------------------------------------------------|------------------------------------------------| +| `async fn new(config: Config) -> Result` | Create a new connection to a Fluss cluster | +| `async fn get_admin(&self) -> Result` | Get the admin interface for cluster management | +| `async fn get_table(&self, table_path: &TablePath) -> Result>` | Get a table for read/write operations | +| `fn config(&self) -> &Config` | Get a reference to the connection config | ## `FlussAdmin` ### Database Operations -| Method | Description | -|---|---| -| `async fn create_database(&self, name: &str, descriptor: Option<&DatabaseDescriptor>, ignore_if_exists: bool) -> Result<()>` | Create a database | -| `async fn drop_database(&self, name: &str, ignore_if_not_exists: bool, cascade: bool) -> Result<()>` | Drop a database | -| `async fn list_databases(&self) -> Result>` | List all databases | -| `async fn database_exists(&self, name: &str) -> Result` | Check if a database exists | -| `async fn get_database_info(&self, name: &str) -> Result` | Get database metadata | +| Method | Description | +|------------------------------------------------------------------------------------------------------------------------------|----------------------------| +| `async fn create_database(&self, name: &str, descriptor: Option<&DatabaseDescriptor>, ignore_if_exists: bool) -> Result<()>` | Create a database | +| `async fn drop_database(&self, name: &str, ignore_if_not_exists: bool, cascade: bool) -> Result<()>` | Drop a database | +| `async fn list_databases(&self) -> Result>` | List all databases | +| `async fn database_exists(&self, name: &str) -> Result` | Check if a database exists | +| `async fn get_database_info(&self, name: &str) -> Result` | Get database metadata | ### Table Operations -| Method | Description | -|---|---| -| `async fn create_table(&self, table_path: &TablePath, descriptor: &TableDescriptor, ignore_if_exists: bool) -> Result<()>` | Create a table | -| `async fn drop_table(&self, table_path: &TablePath, ignore_if_not_exists: bool) -> Result<()>` | Drop a table | -| `async fn get_table_info(&self, table_path: &TablePath) -> Result` | Get table metadata | -| `async fn list_tables(&self, database_name: &str) -> Result>` | List tables in a database | -| `async fn table_exists(&self, table_path: &TablePath) -> Result` | Check if a table exists | +| Method | Description | +|----------------------------------------------------------------------------------------------------------------------------|---------------------------| +| `async fn create_table(&self, table_path: &TablePath, descriptor: &TableDescriptor, ignore_if_exists: bool) -> Result<()>` | Create a table | +| `async fn drop_table(&self, table_path: &TablePath, ignore_if_not_exists: bool) -> Result<()>` | Drop a table | +| `async fn get_table_info(&self, table_path: &TablePath) -> Result` | Get table metadata | +| `async fn list_tables(&self, database_name: &str) -> Result>` | List tables in a database | +| `async fn table_exists(&self, table_path: &TablePath) -> Result` | Check if a table exists | ### Partition Operations -| Method | Description | -|---|---| -| `async fn list_partition_infos(&self, table_path: &TablePath) -> Result>` | List all partitions | -| `async fn create_partition(&self, table_path: &TablePath, spec: &PartitionSpec, ignore_if_exists: bool) -> Result<()>` | Create a partition | -| `async fn drop_partition(&self, table_path: &TablePath, spec: &PartitionSpec, ignore_if_not_exists: bool) -> Result<()>` | Drop a partition | +| Method | Description | +|--------------------------------------------------------------------------------------------------------------------------|---------------------| +| `async fn list_partition_infos(&self, table_path: &TablePath) -> Result>` | List all partitions | +| `async fn create_partition(&self, table_path: &TablePath, spec: &PartitionSpec, ignore_if_exists: bool) -> Result<()>` | Create a partition | +| `async fn drop_partition(&self, table_path: &TablePath, spec: &PartitionSpec, ignore_if_not_exists: bool) -> Result<()>` | Drop a partition | ### Offset Operations -| Method | Description | -|---|---| -| `async fn list_offsets(&self, table_path: &TablePath, bucket_ids: &[i32], offset_spec: OffsetSpec) -> Result>` | Get offsets for buckets | +| Method | Description | +|------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------| +| `async fn list_offsets(&self, table_path: &TablePath, bucket_ids: &[i32], offset_spec: OffsetSpec) -> Result>` | Get offsets for buckets | | `async fn list_partition_offsets(&self, table_path: &TablePath, partition_name: &str, bucket_ids: &[i32], offset_spec: OffsetSpec) -> Result>` | Get offsets for a partition's buckets | ### Lake Operations -| Method | Description | -|---|---| +| Method | Description | +|--------------------------------------------------------------------------------------------|------------------------------| | `async fn get_latest_lake_snapshot(&self, table_path: &TablePath) -> Result` | Get the latest lake snapshot | ## `FlussTable<'a>` -| Method | Description | -|---|---| -| `fn get_table_info(&self) -> &TableInfo` | Get table metadata | +| Method | Description | +|-----------------------------------------------|-----------------------------------------| +| `fn get_table_info(&self) -> &TableInfo` | Get table metadata | | `fn new_append(&self) -> Result` | Create an append builder for log tables | -| `fn new_scan(&self) -> TableScan<'_>` | Create a scan builder | -| `fn new_lookup(&self) -> Result` | Create a lookup builder for PK tables | -| `fn new_upsert(&self) -> Result` | Create an upsert builder for PK tables | -| `fn has_primary_key(&self) -> bool` | Check if the table has a primary key | -| `fn table_path(&self) -> &TablePath` | Get the table path | +| `fn new_scan(&self) -> TableScan<'_>` | Create a scan builder | +| `fn new_lookup(&self) -> Result` | Create a lookup builder for PK tables | +| `fn new_upsert(&self) -> Result` | Create an upsert builder for PK tables | +| `fn has_primary_key(&self) -> bool` | Check if the table has a primary key | +| `fn table_path(&self) -> &TablePath` | Get the table path | ## `TableAppend` -| Method | Description | -|---|---| +| Method | Description | +|---------------------------------------------------|-------------------------| | `fn create_writer(&self) -> Result` | Create an append writer | ## `AppendWriter` -| Method | Description | -|---|---| -| `fn append(&self, row: &impl InternalRow) -> Result` | Append a row; returns a future for acknowledgment | -| `fn append_arrow_batch(&self, batch: RecordBatch) -> Result` | Append an Arrow RecordBatch | -| `async fn flush(&self) -> Result<()>` | Flush all pending writes to the server | +| Method | Description | +|---------------------------------------------------------------------------------|---------------------------------------------------| +| `fn append(&self, row: &impl InternalRow) -> Result` | Append a row; returns a future for acknowledgment | +| `fn append_arrow_batch(&self, batch: RecordBatch) -> Result` | Append an Arrow RecordBatch | +| `async fn flush(&self) -> Result<()>` | Flush all pending writes to the server | ## `TableScan<'a>` -| Method | Description | -|---|---| -| `fn project(self, indices: &[usize]) -> Result` | Project columns by index | -| `fn project_by_name(self, names: &[&str]) -> Result` | Project columns by name | -| `fn create_log_scanner(self) -> Result` | Create a record-based log scanner | +| Method | Description | +|-----------------------------------------------------------------------------|-----------------------------------------| +| `fn project(self, indices: &[usize]) -> Result` | Project columns by index | +| `fn project_by_name(self, names: &[&str]) -> Result` | Project columns by name | +| `fn create_log_scanner(self) -> Result` | Create a record-based log scanner | | `fn create_record_batch_log_scanner(self) -> Result` | Create an Arrow batch-based log scanner | ## `LogScanner` -| Method | Description | -|---|---| -| `async fn subscribe(&self, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a bucket | -| `async fn subscribe_buckets(&self, bucket_offsets: &HashMap) -> Result<()>` | Subscribe to multiple buckets | -| `async fn subscribe_partition(&self, partition_id: i64, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a partition bucket | -| `async fn subscribe_partition_buckets(&self, offsets: &HashMap<(i64, i32), i64>) -> Result<()>` | Subscribe to multiple partition-bucket pairs | -| `async fn unsubscribe_partition(&self, partition_id: i64, bucket_id: i32) -> Result<()>` | Unsubscribe from a partition bucket | -| `async fn poll(&self, timeout: Duration) -> Result` | Poll for records | +| Method | Description | +|-----------------------------------------------------------------------------------------------------------|----------------------------------------------------------| +| `async fn subscribe(&self, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a bucket | +| `async fn subscribe_buckets(&self, bucket_offsets: &HashMap) -> Result<()>` | Subscribe to multiple buckets | +| `async fn subscribe_partition(&self, partition_id: i64, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a partition bucket | +| `async fn subscribe_partition_buckets(&self, offsets: &HashMap<(i64, i32), i64>) -> Result<()>` | Subscribe to multiple partition-bucket pairs | +| `async fn unsubscribe(&self, bucket_id: i32) -> Result<()>` | Unsubscribe from a bucket (non-partitioned tables) | +| `async fn unsubscribe_partition(&self, partition_id: i64, bucket_id: i32) -> Result<()>` | Unsubscribe from a partition bucket (partitioned tables) | +| `async fn poll(&self, timeout: Duration) -> Result` | Poll for records | ## `RecordBatchLogScanner` -| Method | Description | -|---|---| -| `async fn subscribe(&self, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a bucket | -| `async fn subscribe_partition(&self, partition_id: i64, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a partition bucket | -| `async fn poll(&self, timeout: Duration) -> Result>` | Poll for Arrow record batches | +| Method | Description | +|-----------------------------------------------------------------------------------------------------------|----------------------------------------------------------| +| `async fn subscribe(&self, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a bucket | +| `async fn subscribe_buckets(&self, bucket_offsets: &HashMap) -> Result<()>` | Subscribe to multiple buckets | +| `async fn subscribe_partition(&self, partition_id: i64, bucket_id: i32, start_offset: i64) -> Result<()>` | Subscribe to a partition bucket | +| `async fn subscribe_partition_buckets(&self, offsets: &HashMap<(i64, i32), i64>) -> Result<()>` | Subscribe to multiple partition-bucket pairs | +| `async fn unsubscribe(&self, bucket_id: i32) -> Result<()>` | Unsubscribe from a bucket (non-partitioned tables) | +| `async fn unsubscribe_partition(&self, partition_id: i64, bucket_id: i32) -> Result<()>` | Unsubscribe from a partition bucket (partitioned tables) | +| `async fn poll(&self, timeout: Duration) -> Result>` | Poll for Arrow record batches | ## `ScanRecord` -| Method | Description | -|---|---| -| `fn row(&self) -> &dyn InternalRow` | Get the row data | -| `fn offset(&self) -> i64` | Record offset in the log | -| `fn timestamp(&self) -> i64` | Record timestamp | +| Method | Description | +|----------------------------------------|----------------------------------------| +| `fn row(&self) -> &dyn InternalRow` | Get the row data | +| `fn offset(&self) -> i64` | Record offset in the log | +| `fn timestamp(&self) -> i64` | Record timestamp | | `fn change_type(&self) -> &ChangeType` | Change type (AppendOnly, Insert, etc.) | ## `ScanRecords` -| Method | Description | -|---|---| -| `fn count(&self) -> usize` | Number of records | -| `fn is_empty(&self) -> bool` | Whether the result set is empty | -| `fn records(&self, bucket: &TableBucket) -> &[ScanRecord]` | Get records for a specific bucket | +| Method | Description | +|--------------------------------------------------------------------------|-----------------------------------| +| `fn count(&self) -> usize` | Number of records | +| `fn is_empty(&self) -> bool` | Whether the result set is empty | +| `fn records(&self, bucket: &TableBucket) -> &[ScanRecord]` | Get records for a specific bucket | | `fn records_by_buckets(&self) -> &HashMap>` | Get all records grouped by bucket | `ScanRecords` also implements `IntoIterator`, so you can iterate over all records directly: @@ -151,53 +156,53 @@ for record in records { ## `ScanBatch` -| Method | Description | -|---|---| -| `fn bucket(&self) -> &TableBucket` | Bucket this batch belongs to | -| `fn batch(&self) -> &RecordBatch` | Arrow RecordBatch data | -| `fn base_offset(&self) -> i64` | First record offset | -| `fn last_offset(&self) -> i64` | Last record offset | -| `fn num_records(&self) -> usize` | Number of records in the batch | +| Method | Description | +|------------------------------------|--------------------------------| +| `fn bucket(&self) -> &TableBucket` | Bucket this batch belongs to | +| `fn batch(&self) -> &RecordBatch` | Arrow RecordBatch data | +| `fn base_offset(&self) -> i64` | First record offset | +| `fn last_offset(&self) -> i64` | Last record offset | +| `fn num_records(&self) -> usize` | Number of records in the batch | ## `TableUpsert` -| Method | Description | -|---|---| -| `fn create_writer(&self) -> Result` | Create an upsert writer | +| Method | Description | +|---------------------------------------------------------------------------------------|---------------------------------------------------| +| `fn create_writer(&self) -> Result` | Create an upsert writer | | `fn partial_update(&self, column_indices: Option>) -> Result` | Create a partial update builder by column indices | -| `fn partial_update_with_column_names(&self, names: &[&str]) -> Result` | Create a partial update builder by column names | +| `fn partial_update_with_column_names(&self, names: &[&str]) -> Result` | Create a partial update builder by column names | ## `UpsertWriter` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------------------------------------------|---------------------------------------| | `fn upsert(&self, row: &impl InternalRow) -> Result` | Upsert a row (insert or update by PK) | -| `fn delete(&self, row: &impl InternalRow) -> Result` | Delete a row by primary key | -| `async fn flush(&self) -> Result<()>` | Flush all pending operations | +| `fn delete(&self, row: &impl InternalRow) -> Result` | Delete a row by primary key | +| `async fn flush(&self) -> Result<()>` | Flush all pending operations | ## `TableLookup` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------------------|-------------------------------------| | `fn create_lookuper(&self) -> Result` | Create a lookuper for point lookups | ## `Lookuper` -| Method | Description | -|---|---| +| Method | Description | +|------------------------------------------------------------------------------|-----------------------------| | `async fn lookup(&mut self, key: &impl InternalRow) -> Result` | Lookup a row by primary key | ## `LookupResult` -| Method | Description | -|---|---| +| Method | Description | +|----------------------------------------------------------------|----------------------------------| | `fn get_single_row(&self) -> Result>` | Get a single row from the result | -| `fn get_rows(&self) -> Vec` | Get all rows from the result | +| `fn get_rows(&self) -> Vec` | Get all rows from the result | ## `WriteResultFuture` -| Description | -|---| +| Description | +|-----------------------------------------------------------------------------------------------------------------------------------------------| | Implements `Future>`. Await to wait for server acknowledgment. Returned by `append()`, `upsert()`, and `delete()`. | Usage: @@ -213,215 +218,224 @@ writer.append(&row)?.await?; ## `Schema` -| Method | Description | -|---|---| -| `fn builder() -> SchemaBuilder` | Create a schema builder | -| `fn columns(&self) -> &[Column]` | Get all columns | +| Method | Description | +|------------------------------------------------|------------------------------------------| +| `fn builder() -> SchemaBuilder` | Create a schema builder | +| `fn columns(&self) -> &[Column]` | Get all columns | | `fn primary_key(&self) -> Option<&PrimaryKey>` | Get primary key (None if no primary key) | -| `fn column_names(&self) -> Vec<&str>` | Get all column names | -| `fn primary_key_indexes(&self) -> Vec` | Get primary key column indices | +| `fn column_names(&self) -> Vec<&str>` | Get all column names | +| `fn primary_key_indexes(&self) -> Vec` | Get primary key column indices | ## `SchemaBuilder` -| Method | Description | -|---|---| -| `fn column(name: &str, data_type: DataType) -> Self` | Add a column | -| `fn primary_key(keys: Vec<&str>) -> Self` | Set primary key columns | -| `fn build() -> Result` | Build the schema | +| Method | Description | +|------------------------------------------------------|-------------------------| +| `fn column(name: &str, data_type: DataType) -> Self` | Add a column | +| `fn primary_key(keys: Vec<&str>) -> Self` | Set primary key columns | +| `fn build() -> Result` | Build the schema | ## `TableDescriptor` -| Method | Description | -|---|---| -| `fn builder() -> TableDescriptorBuilder` | Create a table descriptor builder | -| `fn schema(&self) -> &Schema` | Get the table schema | -| `fn partition_keys(&self) -> &[String]` | Get partition key column names | -| `fn has_primary_key(&self) -> bool` | Check if the table has a primary key | -| `fn properties(&self) -> &HashMap` | Get all table properties | -| `fn comment(&self) -> Option<&str>` | Get table comment | +| Method | Description | +|----------------------------------------------------|--------------------------------------| +| `fn builder() -> TableDescriptorBuilder` | Create a table descriptor builder | +| `fn schema(&self) -> &Schema` | Get the table schema | +| `fn partition_keys(&self) -> &[String]` | Get partition key column names | +| `fn has_primary_key(&self) -> bool` | Check if the table has a primary key | +| `fn properties(&self) -> &HashMap` | Get all table properties | +| `fn comment(&self) -> Option<&str>` | Get table comment | ## `TableDescriptorBuilder` -| Method | Description | -|---|---| -| `fn schema(schema: Schema) -> Self` | Set the schema | -| `fn log_format(format: LogFormat) -> Self` | Set log format (e.g., `LogFormat::ARROW`) | -| `fn kv_format(format: KvFormat) -> Self` | Set KV format (e.g., `KvFormat::COMPACTED`) | -| `fn property(key: &str, value: &str) -> Self` | Set a table property | -| `fn partitioned_by(keys: Vec<&str>) -> Self` | Set partition columns | -| `fn distributed_by(bucket_count: Option, bucket_keys: Vec) -> Self` | Set bucket distribution | -| `fn comment(comment: &str) -> Self` | Set table comment | -| `fn build() -> Result` | Build the table descriptor | +| Method | Description | +|----------------------------------------------------------------------------------|---------------------------------------------| +| `fn schema(schema: Schema) -> Self` | Set the schema | +| `fn log_format(format: LogFormat) -> Self` | Set log format (e.g., `LogFormat::ARROW`) | +| `fn kv_format(format: KvFormat) -> Self` | Set KV format (e.g., `KvFormat::COMPACTED`) | +| `fn property(key: &str, value: &str) -> Self` | Set a table property | +| `fn partitioned_by(keys: Vec<&str>) -> Self` | Set partition columns | +| `fn distributed_by(bucket_count: Option, bucket_keys: Vec) -> Self` | Set bucket distribution | +| `fn comment(comment: &str) -> Self` | Set table comment | +| `fn build() -> Result` | Build the table descriptor | ## `TablePath` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------------------------|---------------------| | `TablePath::new(database: &str, table: &str) -> Self` | Create a table path | -| `fn database(&self) -> &str` | Get database name | -| `fn table(&self) -> &str` | Get table name | +| `fn database(&self) -> &str` | Get database name | +| `fn table(&self) -> &str` | Get table name | ## `TableInfo` -| Field / Method | Description | -|---|---| -| `.table_path` | `TablePath` -- Table path | -| `.table_id` | `i64` -- Table ID | -| `.schema_id` | `i32` -- Schema ID | -| `.schema` | `Schema` -- Table schema | -| `.primary_keys` | `Vec` -- Primary key column names | -| `.partition_keys` | `Vec` -- Partition key column names | -| `.num_buckets` | `i32` -- Number of buckets | -| `.properties` | `HashMap` -- All table properties | +| Field / Method | Description | +|----------------------|-----------------------------------------------------| +| `.table_path` | `TablePath` -- Table path | +| `.table_id` | `i64` -- Table ID | +| `.schema_id` | `i32` -- Schema ID | +| `.schema` | `Schema` -- Table schema | +| `.primary_keys` | `Vec` -- Primary key column names | +| `.partition_keys` | `Vec` -- Partition key column names | +| `.num_buckets` | `i32` -- Number of buckets | +| `.properties` | `HashMap` -- All table properties | | `.custom_properties` | `HashMap` -- Custom properties only | -| `.comment` | `Option` -- Table comment | -| `.created_time` | `i64` -- Creation timestamp | -| `.modified_time` | `i64` -- Last modification timestamp | +| `.comment` | `Option` -- Table comment | +| `.created_time` | `i64` -- Creation timestamp | +| `.modified_time` | `i64` -- Last modification timestamp | ## `TableBucket` -| Method | Description | -|---|---| -| `TableBucket::new(table_id: i64, bucket_id: i32) -> Self` | Create a non-partitioned bucket | -| `TableBucket::new_with_partition(table_id: i64, partition_id: Option, bucket_id: i32) -> Self` | Create a partitioned bucket | -| `fn table_id(&self) -> i64` | Get table ID | -| `fn partition_id(&self) -> Option` | Get partition ID (None if non-partitioned) | -| `fn bucket_id(&self) -> i32` | Get bucket ID | +| Method | Description | +|-----------------------------------------------------------------------------------------------------|--------------------------------------------| +| `TableBucket::new(table_id: i64, bucket_id: i32) -> Self` | Create a non-partitioned bucket | +| `TableBucket::new_with_partition(table_id: i64, partition_id: Option, bucket_id: i32) -> Self` | Create a partitioned bucket | +| `fn table_id(&self) -> i64` | Get table ID | +| `fn partition_id(&self) -> Option` | Get partition ID (None if non-partitioned) | +| `fn bucket_id(&self) -> i32` | Get bucket ID | ## `PartitionSpec` -| Method | Description | -|---|---| +| Method | Description | +|-------------------------------------------------------------|-------------------------------------------------------| | `PartitionSpec::new(spec_map: HashMap<&str, &str>) -> Self` | Create from a map of partition column names to values | -| `fn get_spec_map(&self) -> &HashMap` | Get the partition spec map | +| `fn get_spec_map(&self) -> &HashMap` | Get the partition spec map | ## `PartitionInfo` -| Method | Description | -|---|---| -| `fn get_partition_id(&self) -> i64` | Get partition ID | +| Method | Description | +|------------------------------------------|--------------------| +| `fn get_partition_id(&self) -> i64` | Get partition ID | | `fn get_partition_name(&self) -> String` | Get partition name | ## `DatabaseDescriptor` -| Method | Description | -|---|---| -| `fn builder() -> DatabaseDescriptorBuilder` | Create a database descriptor builder | -| `fn comment(&self) -> Option<&str>` | Get database comment | -| `fn custom_properties(&self) -> &HashMap` | Get custom properties | +| Method | Description | +|-----------------------------------------------------------|--------------------------------------| +| `fn builder() -> DatabaseDescriptorBuilder` | Create a database descriptor builder | +| `fn comment(&self) -> Option<&str>` | Get database comment | +| `fn custom_properties(&self) -> &HashMap` | Get custom properties | ## `DatabaseDescriptorBuilder` -| Method | Description | -|---|---| -| `fn comment(comment: impl Into) -> Self` | Set database comment | -| `fn custom_properties(properties: HashMap, impl Into>) -> Self` | Set custom properties | -| `fn custom_property(key: impl Into, value: impl Into) -> Self` | Set a single custom property | -| `fn build() -> DatabaseDescriptor` | Build the database descriptor | +| Method | Description | +|-------------------------------------------------------------------------------------------|-------------------------------| +| `fn comment(comment: impl Into) -> Self` | Set database comment | +| `fn custom_properties(properties: HashMap, impl Into>) -> Self` | Set custom properties | +| `fn custom_property(key: impl Into, value: impl Into) -> Self` | Set a single custom property | +| `fn build() -> DatabaseDescriptor` | Build the database descriptor | ## `DatabaseInfo` -| Method | Description | -|---|---| -| `fn database_name(&self) -> &str` | Get database name | -| `fn created_time(&self) -> i64` | Get creation timestamp | -| `fn modified_time(&self) -> i64` | Get last modification timestamp | -| `fn database_descriptor(&self) -> &DatabaseDescriptor` | Get the database descriptor | +| Method | Description | +|--------------------------------------------------------|---------------------------------| +| `fn database_name(&self) -> &str` | Get database name | +| `fn created_time(&self) -> i64` | Get creation timestamp | +| `fn modified_time(&self) -> i64` | Get last modification timestamp | +| `fn database_descriptor(&self) -> &DatabaseDescriptor` | Get the database descriptor | ## `LakeSnapshot` -| Field | Description | -|---|---| -| `.snapshot_id` | `i64` -- Snapshot ID | +| Field | Description | +|-------------------------|---------------------------------------------------| +| `.snapshot_id` | `i64` -- Snapshot ID | | `.table_buckets_offset` | `HashMap` -- All bucket offsets | ## `GenericRow<'a>` -| Method | Description | -|---|---| -| `GenericRow::new(field_count: usize) -> Self` | Create a new row with the given number of fields | -| `fn set_field(&mut self, pos: usize, value: impl Into>)` | Set a field value by position | -| `GenericRow::from_data(data: Vec>>) -> Self` | Create a row from existing field data | +| Method | Description | +|--------------------------------------------------------------------|--------------------------------------------------| +| `GenericRow::new(field_count: usize) -> Self` | Create a new row with the given number of fields | +| `fn set_field(&mut self, pos: usize, value: impl Into>)` | Set a field value by position | +| `GenericRow::from_data(data: Vec>>) -> Self` | Create a row from existing field data | Implements the `InternalRow` trait (see below). ## `InternalRow` trait -| Method | Description | -|---|---| -| `fn get_boolean(&self, idx: usize) -> bool` | Get boolean value | -| `fn get_byte(&self, idx: usize) -> i8` | Get tinyint value | -| `fn get_short(&self, idx: usize) -> i16` | Get smallint value | -| `fn get_int(&self, idx: usize) -> i32` | Get int value | -| `fn get_long(&self, idx: usize) -> i64` | Get bigint value | -| `fn get_float(&self, idx: usize) -> f32` | Get float value | -| `fn get_double(&self, idx: usize) -> f64` | Get double value | -| `fn get_string(&self, idx: usize) -> &str` | Get string value | -| `fn get_decimal(&self, idx: usize, precision: usize, scale: usize) -> Decimal` | Get decimal value | -| `fn get_date(&self, idx: usize) -> Date` | Get date value | -| `fn get_time(&self, idx: usize) -> Time` | Get time value | -| `fn get_timestamp_ntz(&self, idx: usize, precision: u32) -> TimestampNtz` | Get timestamp value | -| `fn get_timestamp_ltz(&self, idx: usize, precision: u32) -> TimestampLtz` | Get timestamp with local timezone value | -| `fn get_bytes(&self, idx: usize) -> &[u8]` | Get bytes value | -| `fn get_binary(&self, idx: usize, length: usize) -> &[u8]` | Get fixed-length binary value | -| `fn get_char(&self, idx: usize, length: usize) -> &str` | Get fixed-length char value | +| Method | Description | +|--------------------------------------------------------------------------------|-----------------------------------------| +| `fn get_boolean(&self, idx: usize) -> bool` | Get boolean value | +| `fn get_byte(&self, idx: usize) -> i8` | Get tinyint value | +| `fn get_short(&self, idx: usize) -> i16` | Get smallint value | +| `fn get_int(&self, idx: usize) -> i32` | Get int value | +| `fn get_long(&self, idx: usize) -> i64` | Get bigint value | +| `fn get_float(&self, idx: usize) -> f32` | Get float value | +| `fn get_double(&self, idx: usize) -> f64` | Get double value | +| `fn get_string(&self, idx: usize) -> &str` | Get string value | +| `fn get_decimal(&self, idx: usize, precision: usize, scale: usize) -> Decimal` | Get decimal value | +| `fn get_date(&self, idx: usize) -> Date` | Get date value | +| `fn get_time(&self, idx: usize) -> Time` | Get time value | +| `fn get_timestamp_ntz(&self, idx: usize, precision: u32) -> TimestampNtz` | Get timestamp value | +| `fn get_timestamp_ltz(&self, idx: usize, precision: u32) -> TimestampLtz` | Get timestamp with local timezone value | +| `fn get_bytes(&self, idx: usize) -> &[u8]` | Get bytes value | +| `fn get_binary(&self, idx: usize, length: usize) -> &[u8]` | Get fixed-length binary value | +| `fn get_char(&self, idx: usize, length: usize) -> &str` | Get fixed-length char value | ## `ChangeType` -| Value | Short String | Description | -|---|---|---| -| `ChangeType::AppendOnly` | `+A` | Append-only record | -| `ChangeType::Insert` | `+I` | Inserted row | -| `ChangeType::UpdateBefore` | `-U` | Previous value of an updated row | -| `ChangeType::UpdateAfter` | `+U` | New value of an updated row | -| `ChangeType::Delete` | `-D` | Deleted row | +| Value | Short String | Description | +|----------------------------|---------------|----------------------------------| +| `ChangeType::AppendOnly` | `+A` | Append-only record | +| `ChangeType::Insert` | `+I` | Inserted row | +| `ChangeType::UpdateBefore` | `-U` | Previous value of an updated row | +| `ChangeType::UpdateAfter` | `+U` | New value of an updated row | +| `ChangeType::Delete` | `-D` | Deleted row | -| Method | Description | -|---|---| +| Method | Description | +|----------------------------------|-------------------------------------| | `fn short_string(&self) -> &str` | Get the short string representation | ## `OffsetSpec` -| Variant | Description | -|---|---| -| `OffsetSpec::Earliest` | Start from the earliest available offset | -| `OffsetSpec::Latest` | Start from the latest offset (only new records) | +| Variant | Description | +|------------------------------|-------------------------------------------------| +| `OffsetSpec::Earliest` | Start from the earliest available offset | +| `OffsetSpec::Latest` | Start from the latest offset (only new records) | | `OffsetSpec::Timestamp(i64)` | Start from a specific timestamp in milliseconds | ## Constants -| Constant | Value | Description | -|---|---|---| -| `fluss::client::EARLIEST_OFFSET` | `-2` | Start reading from the earliest available offset | -| `fluss::client::LATEST_OFFSET` | `-1` | Start reading from the latest offset (only new records) | +| Constant | Value | Description | +|----------------------------------|--------|---------------------------------------------------------| +| `fluss::client::EARLIEST_OFFSET` | `-2` | Start reading from the earliest available offset | + +To start reading from the latest offset (only new records), resolve the current offset via `list_offsets` before subscribing: + +```rust +use fluss::rpc::message::OffsetSpec; + +let offsets = admin.list_offsets(&table_path, &[0], OffsetSpec::Latest).await?; +let latest = offsets[&0]; +log_scanner.subscribe(0, latest).await?; +``` ## `DataTypes` factory -| Method | Returns | Description | -|---|---|---| -| `DataTypes::boolean()` | `DataType` | Boolean type | -| `DataTypes::tinyint()` | `DataType` | 8-bit signed integer | -| `DataTypes::smallint()` | `DataType` | 16-bit signed integer | -| `DataTypes::int()` | `DataType` | 32-bit signed integer | -| `DataTypes::bigint()` | `DataType` | 64-bit signed integer | -| `DataTypes::float()` | `DataType` | 32-bit floating point | -| `DataTypes::double()` | `DataType` | 64-bit floating point | -| `DataTypes::string()` | `DataType` | Variable-length string | -| `DataTypes::bytes()` | `DataType` | Variable-length byte array | -| `DataTypes::date()` | `DataType` | Date (days since epoch) | -| `DataTypes::time()` | `DataType` | Time (milliseconds since midnight) | -| `DataTypes::timestamp()` | `DataType` | Timestamp without timezone | -| `DataTypes::timestamp_ltz()` | `DataType` | Timestamp with local timezone | -| `DataTypes::decimal(precision: u32, scale: u32)` | `DataType` | Fixed-point decimal | -| `DataTypes::char(length: u32)` | `DataType` | Fixed-length string | -| `DataTypes::binary(length: usize)` | `DataType` | Fixed-length byte array | -| `DataTypes::array(element: DataType)` | `DataType` | Array of elements | -| `DataTypes::map(key: DataType, value: DataType)` | `DataType` | Map of key-value pairs | -| `DataTypes::row(fields: Vec)` | `DataType` | Nested row type | +| Method | Returns | Description | +|--------------------------------------------------|------------|------------------------------------| +| `DataTypes::boolean()` | `DataType` | Boolean type | +| `DataTypes::tinyint()` | `DataType` | 8-bit signed integer | +| `DataTypes::smallint()` | `DataType` | 16-bit signed integer | +| `DataTypes::int()` | `DataType` | 32-bit signed integer | +| `DataTypes::bigint()` | `DataType` | 64-bit signed integer | +| `DataTypes::float()` | `DataType` | 32-bit floating point | +| `DataTypes::double()` | `DataType` | 64-bit floating point | +| `DataTypes::string()` | `DataType` | Variable-length string | +| `DataTypes::bytes()` | `DataType` | Variable-length byte array | +| `DataTypes::date()` | `DataType` | Date (days since epoch) | +| `DataTypes::time()` | `DataType` | Time (milliseconds since midnight) | +| `DataTypes::timestamp()` | `DataType` | Timestamp without timezone | +| `DataTypes::timestamp_ltz()` | `DataType` | Timestamp with local timezone | +| `DataTypes::decimal(precision: u32, scale: u32)` | `DataType` | Fixed-point decimal | +| `DataTypes::char(length: u32)` | `DataType` | Fixed-length string | +| `DataTypes::binary(length: usize)` | `DataType` | Fixed-length byte array | +| `DataTypes::array(element: DataType)` | `DataType` | Array of elements | +| `DataTypes::map(key: DataType, value: DataType)` | `DataType` | Map of key-value pairs | +| `DataTypes::row(fields: Vec)` | `DataType` | Nested row type | ## `DataField` -| Method | Description | -|---|---| +| Method | Description | +|----------------------------------------------------------------------------------------------------------|---------------------| | `DataField::new(name: impl Into, data_type: DataType, description: Option) -> DataField` | Create a data field | -| `fn name(&self) -> &str` | Get the field name | +| `fn name(&self) -> &str` | Get the field name | diff --git a/website/docs/user-guide/rust/data-types.md b/website/docs/user-guide/rust/data-types.md index c4f46cb2..f5b55345 100644 --- a/website/docs/user-guide/rust/data-types.md +++ b/website/docs/user-guide/rust/data-types.md @@ -3,24 +3,24 @@ sidebar_position: 3 --- # Data Types -| Fluss Type | Rust Type | Getter | Setter | -|---|---|---|---| -| `BOOLEAN` | `bool` | `get_boolean()` | `set_field(idx, bool)` | -| `TINYINT` | `i8` | `get_byte()` | `set_field(idx, i8)` | -| `SMALLINT` | `i16` | `get_short()` | `set_field(idx, i16)` | -| `INT` | `i32` | `get_int()` | `set_field(idx, i32)` | -| `BIGINT` | `i64` | `get_long()` | `set_field(idx, i64)` | -| `FLOAT` | `f32` | `get_float()` | `set_field(idx, f32)` | -| `DOUBLE` | `f64` | `get_double()` | `set_field(idx, f64)` | -| `CHAR` | `&str` | `get_char(idx, length)` | `set_field(idx, &str)` | -| `STRING` | `&str` | `get_string()` | `set_field(idx, &str)` | -| `DECIMAL` | `Decimal` | `get_decimal(idx, precision, scale)` | `set_field(idx, Decimal)` | -| `DATE` | `Date` | `get_date()` | `set_field(idx, Date)` | -| `TIME` | `Time` | `get_time()` | `set_field(idx, Time)` | -| `TIMESTAMP` | `TimestampNtz` | `get_timestamp_ntz(idx, precision)` | `set_field(idx, TimestampNtz)` | -| `TIMESTAMP_LTZ` | `TimestampLtz` | `get_timestamp_ltz(idx, precision)` | `set_field(idx, TimestampLtz)` | -| `BYTES` | `&[u8]` | `get_bytes()` | `set_field(idx, &[u8])` | -| `BINARY(n)` | `&[u8]` | `get_binary(idx, length)` | `set_field(idx, &[u8])` | +| Fluss Type | Rust Type | Getter | Setter | +|-----------------|----------------|--------------------------------------|--------------------------------| +| `BOOLEAN` | `bool` | `get_boolean()` | `set_field(idx, bool)` | +| `TINYINT` | `i8` | `get_byte()` | `set_field(idx, i8)` | +| `SMALLINT` | `i16` | `get_short()` | `set_field(idx, i16)` | +| `INT` | `i32` | `get_int()` | `set_field(idx, i32)` | +| `BIGINT` | `i64` | `get_long()` | `set_field(idx, i64)` | +| `FLOAT` | `f32` | `get_float()` | `set_field(idx, f32)` | +| `DOUBLE` | `f64` | `get_double()` | `set_field(idx, f64)` | +| `CHAR` | `&str` | `get_char(idx, length)` | `set_field(idx, &str)` | +| `STRING` | `&str` | `get_string()` | `set_field(idx, &str)` | +| `DECIMAL` | `Decimal` | `get_decimal(idx, precision, scale)` | `set_field(idx, Decimal)` | +| `DATE` | `Date` | `get_date()` | `set_field(idx, Date)` | +| `TIME` | `Time` | `get_time()` | `set_field(idx, Time)` | +| `TIMESTAMP` | `TimestampNtz` | `get_timestamp_ntz(idx, precision)` | `set_field(idx, TimestampNtz)` | +| `TIMESTAMP_LTZ` | `TimestampLtz` | `get_timestamp_ltz(idx, precision)` | `set_field(idx, TimestampLtz)` | +| `BYTES` | `&[u8]` | `get_bytes()` | `set_field(idx, &[u8])` | +| `BINARY(n)` | `&[u8]` | `get_binary(idx, length)` | `set_field(idx, &[u8])` | ## Constructing Special Types diff --git a/website/docs/user-guide/rust/error-handling.md b/website/docs/user-guide/rust/error-handling.md index 8198f65f..35ede6c8 100644 --- a/website/docs/user-guide/rust/error-handling.md +++ b/website/docs/user-guide/rust/error-handling.md @@ -44,17 +44,17 @@ match result { ## Error Variants -| Variant | Description | -|---|---| -| `UnexpectedError` | General unexpected errors with a message and optional source | -| `IoUnexpectedError` | I/O errors (network, file system) | -| `RemoteStorageUnexpectedError` | Remote storage errors (OpenDAL backend failures) | -| `RpcError` | RPC communication failures (connection refused, timeout) | -| `RowConvertError` | Row conversion failures (type mismatch, invalid data) | -| `ArrowError` | Arrow data handling errors (schema mismatch, encoding) | -| `IllegalArgument` | Invalid arguments passed to an API method | -| `UnsupportedOperation` | Operation not supported on the table type | -| `FlussAPIError` | Server-side API errors returned by the Fluss cluster | +| Variant | Description | +|--------------------------------|--------------------------------------------------------------| +| `UnexpectedError` | General unexpected errors with a message and optional source | +| `IoUnexpectedError` | I/O errors (network, file system) | +| `RemoteStorageUnexpectedError` | Remote storage errors (OpenDAL backend failures) | +| `RpcError` | RPC communication failures (connection refused, timeout) | +| `RowConvertError` | Row conversion failures (type mismatch, invalid data) | +| `ArrowError` | Arrow data handling errors (schema mismatch, encoding) | +| `IllegalArgument` | Invalid arguments passed to an API method | +| `UnsupportedOperation` | Operation not supported on the table type | +| `FlussAPIError` | Server-side API errors returned by the Fluss cluster | Server side errors are represented as `FlussAPIError` with a specific error code. Use the `api_error()` helper to match them ergonomically: @@ -107,7 +107,7 @@ match result { _ => {} } -// conn.get_table() wraps the error differently — match on FlussAPIError directly +// conn.get_table() wraps the error differently, match on FlussAPIError directly let result = conn.get_table(&table_path).await; match result { Err(Error::FlussAPIError { ref api_error }) => { diff --git a/website/docs/user-guide/rust/example/configuration.md b/website/docs/user-guide/rust/example/configuration.md index 4ab04adb..82f536fb 100644 --- a/website/docs/user-guide/rust/example/configuration.md +++ b/website/docs/user-guide/rust/example/configuration.md @@ -15,10 +15,10 @@ let conn = FlussConnection::new(config).await?; ## Configuration Options -| Option | Description | Default | -|--------|-------------|---------| -| `bootstrap_servers` | Coordinator server address | `127.0.0.1:9123` | -| `writer_request_max_size` | Maximum request size in bytes | 10 MB | -| `writer_acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | -| `writer_retries` | Number of retries on failure | `i32::MAX` | -| `writer_batch_size` | Batch size for writes | 2 MB | +| Option | Description | Default | +|---------------------------|-------------------------------------------------------|------------------| +| `bootstrap_servers` | Coordinator server address | `127.0.0.1:9123` | +| `writer_request_max_size` | Maximum request size in bytes | 10 MB | +| `writer_acks` | Acknowledgment setting (`all` waits for all replicas) | `all` | +| `writer_retries` | Number of retries on failure | `i32::MAX` | +| `writer_batch_size` | Batch size for writes | 2 MB | diff --git a/website/docs/user-guide/rust/example/log-tables.md b/website/docs/user-guide/rust/example/log-tables.md index 1672a95b..3ba33542 100644 --- a/website/docs/user-guide/rust/example/log-tables.md +++ b/website/docs/user-guide/rust/example/log-tables.md @@ -78,13 +78,25 @@ for record in records { **Subscribe from special offsets:** ```rust -use fluss::client::{EARLIEST_OFFSET, LATEST_OFFSET}; +use fluss::client::EARLIEST_OFFSET; log_scanner.subscribe(0, EARLIEST_OFFSET).await?; // from earliest -log_scanner.subscribe(0, LATEST_OFFSET).await?; // only new records log_scanner.subscribe(0, 42).await?; // from specific offset ``` +**Subscribe from latest offset (only new records):** + +To start reading only new records, first resolve the current latest offset via `list_offsets`, then subscribe at that offset: + +```rust +use fluss::rpc::message::OffsetSpec; + +let admin = conn.get_admin().await?; +let offsets = admin.list_offsets(&table_path, &[0], OffsetSpec::Latest).await?; +let latest = offsets[&0]; +log_scanner.subscribe(0, latest).await?; +``` + **Subscribe to all buckets:** ```rust @@ -105,9 +117,13 @@ bucket_offsets.insert(1, 100i64); log_scanner.subscribe_buckets(&bucket_offsets).await?; ``` -**Unsubscribe from a partition bucket:** +**Unsubscribe from a bucket:** ```rust +// Non-partitioned tables +log_scanner.unsubscribe(bucket_id).await?; + +// Partitioned tables log_scanner.unsubscribe_partition(partition_id, bucket_id).await?; ``` diff --git a/website/docs/user-guide/rust/example/partitioned-tables.md b/website/docs/user-guide/rust/example/partitioned-tables.md index a1d2475d..3edf4d88 100644 --- a/website/docs/user-guide/rust/example/partitioned-tables.md +++ b/website/docs/user-guide/rust/example/partitioned-tables.md @@ -31,7 +31,7 @@ admin.create_table(&table_path, &table_descriptor, true).await?; ### Writing to Partitioned Log Tables -**Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** Include partition column values in each row — the client routes records to the correct partition automatically. +**Partitions must exist before writing data, otherwise the client will by default retry indefinitely.** Include partition column values in each row, the client routes records to the correct partition automatically. ```rust use fluss::metadata::PartitionSpec; diff --git a/website/docs/user-guide/rust/installation.md b/website/docs/user-guide/rust/installation.md index 1fe36fd4..e6987831 100644 --- a/website/docs/user-guide/rust/installation.md +++ b/website/docs/user-guide/rust/installation.md @@ -29,11 +29,11 @@ fluss-rs = { version = "0.1", features = ["storage-all"] } ``` Available features: -- `storage-memory` (default) — In-memory storage -- `storage-fs` (default) — Local filesystem storage -- `storage-s3` — Amazon S3 storage -- `storage-oss` — Alibaba OSS storage -- `storage-all` — All storage backends +- `storage-memory` (default: In-memory storage) +- `storage-fs` (default: Local filesystem storage) +- `storage-s3` (Amazon S3 storage) +- `storage-oss` (Alibaba OSS storage) +- `storage-all` (All storage backends) ## Git or Path Dependency