Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions core/wren/src/wren/connector/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import re
from decimal import Decimal as PyDecimal
from typing import Any
from urllib.parse import parse_qsl, unquote_plus, urlparse
from urllib.parse import parse_qsl, unquote, urlparse

import pyarrow as pa
import sqlglot
Expand Down Expand Up @@ -258,9 +258,12 @@ def _build_clickhouse_client_kwargs(connection_info: Any) -> dict:
if statement_timeout is not None:
settings["max_execution_time"] = int(statement_timeout)

# urlparse leaves percent-encoded characters in userinfo, so decode
# them before clickhouse-connect sees the credentials. Matches the
# mssql / postgres URL handling elsewhere in this package.
# urlparse leaves percent-encoded characters in userinfo/path, so
# decode them before clickhouse-connect sees the credentials. Use
# ``unquote`` (not ``unquote_plus``): in a URL, ``+`` is only a space
# inside a query string, NOT in userinfo or the path — so a literal
# ``+`` in a password (e.g. ``pw+1``) must be preserved. This matches
# the mssql / mysql / oracle / trino URL handling in this package.
secure = parsed.scheme == "clickhouse+https"
# Pick the port-less default from the scheme: ClickHouse listens for
# HTTPS on 8443 and plaintext HTTP on 8123. Defaulting an https URL to
Expand All @@ -270,14 +273,12 @@ def _build_clickhouse_client_kwargs(connection_info: Any) -> dict:
out: dict = {
"host": parsed.hostname,
"port": int(parsed.port) if parsed.port else default_port,
"username": (
unquote_plus(parsed.username) if parsed.username else "default"
),
"password": unquote_plus(parsed.password) if parsed.password else "",
"username": (unquote(parsed.username) if parsed.username else "default"),
"password": unquote(parsed.password) if parsed.password else "",
"settings": settings,
}
if parsed.path and parsed.path != "/":
out["database"] = parsed.path.lstrip("/")
out["database"] = unquote(parsed.path.lstrip("/"))
if secure:
out["secure"] = True
# ``settings`` already popped above, so ``out["settings"]`` survives.
Expand Down
31 changes: 30 additions & 1 deletion core/wren/tests/unit/test_clickhouse_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, url: str, **extras) -> None:


def test_clickhouse_url_decodes_username_and_password() -> None:
"""user / password from the URL must be unquote_plus'd.
"""user / password / database from the URL must be percent-decoded.

urlparse leaves ``%40`` (``@``) and ``%20`` (space) literal in the
userinfo, which would otherwise reach ClickHouse verbatim and fail auth.
Expand All @@ -60,6 +60,35 @@ def test_clickhouse_url_decodes_username_and_password() -> None:
assert out["database"] == "analytics"


def test_clickhouse_url_preserves_literal_plus_in_credentials() -> None:
"""A literal ``+`` in userinfo is a plus, not a space.

Regression: the parser used ``unquote_plus``, which turned a literal
``+`` in a password into a space and corrupted the credential. In a URL,
``+`` only means space inside a query string — never in userinfo/path.
"""
info = _FakeConnInfoFromUrl(
"clickhouse://svc+etl:pw+1@clickhouse-host:9000/an+db"
)

out = _build_clickhouse_client_kwargs(info)

assert out["username"] == "svc+etl"
assert out["password"] == "pw+1"
assert out["database"] == "an+db"


def test_clickhouse_url_decodes_percent_encoded_database() -> None:
"""A percent-encoded database name in the path must be decoded too."""
info = _FakeConnInfoFromUrl(
"clickhouse://clickhouse-host:9000/my%20analytics"
)

out = _build_clickhouse_client_kwargs(info)

assert out["database"] == "my analytics"


def test_clickhouse_url_defaults_username_when_omitted() -> None:
"""When the URL has no userinfo, the default ``"default"`` user wins."""
info = _FakeConnInfoFromUrl("clickhouse://clickhouse-host/analytics")
Expand Down
Loading