Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
from bigframes.bigquery.table import create_external_table
from bigframes.core.logging import log_adapter

_functions = [
Expand Down Expand Up @@ -104,6 +105,8 @@
sql_scalar,
# struct ops
struct,
# table ops
create_external_table,
]

_module = sys.modules[__name__]
Expand Down Expand Up @@ -155,6 +158,8 @@
"sql_scalar",
# struct ops
"struct",
# table ops
"create_external_table",
# Modules / SQL namespaces
"ai",
"ml",
Expand Down
103 changes: 103 additions & 0 deletions bigframes/bigquery/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Mapping, Optional, Union

import bigframes_vendored.constants
import google.cloud.bigquery
import pandas as pd

import bigframes.core.logging.log_adapter as log_adapter
import bigframes.core.sql.table
import bigframes.session


def _get_table_metadata(
*,
bqclient: google.cloud.bigquery.Client,
table_name: str,
) -> pd.Series:
table_metadata = bqclient.get_table(table_name)
table_dict = table_metadata.to_api_repr()
return pd.Series(table_dict)


@log_adapter.method_logger(custom_base_name="bigquery_table")
def create_external_table(
table_name: str,
*,
replace: bool = False,
if_not_exists: bool = False,
columns: Optional[Mapping[str, str]] = None,
partition_columns: Optional[Mapping[str, str]] = None,
connection_name: Optional[str] = None,
options: Mapping[str, Union[str, int, float, bool, list]],
session: Optional[bigframes.session.Session] = None,
) -> pd.Series:
"""
Creates a BigQuery external table.

See the `BigQuery CREATE EXTERNAL TABLE DDL syntax
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_external_table_statement>`_
for additional reference.

Args:
table_name (str):
The name of the table in BigQuery.
replace (bool, default False):
Whether to replace the table if it already exists.
if_not_exists (bool, default False):
Whether to ignore the error if the table already exists.
columns (Mapping[str, str], optional):
The table's schema.
partition_columns (Mapping[str, str], optional):
The table's partition columns.
connection_name (str, optional):
The connection to use for the table.
options (Mapping[str, Union[str, int, float, bool, list]]):
The OPTIONS clause, which specifies the table options.
session (bigframes.session.Session, optional):
The session to use. If not provided, the default session is used.

Returns:
pandas.Series:
A Series with object dtype containing the table metadata. Reference
the `BigQuery Table REST API reference
<https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table>`_
for available fields.
"""
import bigframes.pandas as bpd

sql = bigframes.core.sql.table.create_external_table_ddl(
table_name=table_name,
replace=replace,
if_not_exists=if_not_exists,
columns=columns,
partition_columns=partition_columns,
connection_name=connection_name,
options=options,
)

if session is None:
bpd.read_gbq_query(sql)
session = bpd.get_global_session()
assert (
session is not None
), f"Missing connection to BigQuery. Please report how you encountered this error at {bigframes_vendored.constants.FEEDBACK_LINK}."
else:
session.read_gbq_query(sql)

return _get_table_metadata(bqclient=session.bqclient, table_name=table_name)
68 changes: 68 additions & 0 deletions bigframes/core/sql/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Mapping, Optional, Union


def create_external_table_ddl(
table_name: str,
*,
replace: bool = False,
if_not_exists: bool = False,
columns: Optional[Mapping[str, str]] = None,
partition_columns: Optional[Mapping[str, str]] = None,
connection_name: Optional[str] = None,
options: Mapping[str, Union[str, int, float, bool, list]],
) -> str:
"""Generates the CREATE EXTERNAL TABLE DDL statement."""
statement = ["CREATE"]
if replace:
statement.append("OR REPLACE")
statement.append("EXTERNAL TABLE")
if if_not_exists:
statement.append("IF NOT EXISTS")
statement.append(table_name)

if columns:
column_defs = ", ".join([f"{name} {typ}" for name, typ in columns.items()])
statement.append(f"({column_defs})")

if connection_name:
statement.append(f"WITH CONNECTION `{connection_name}`")

if partition_columns:
part_defs = ", ".join(
[f"{name} {typ}" for name, typ in partition_columns.items()]
)
statement.append(f"WITH PARTITION COLUMNS ({part_defs})")

if options:
opts = []
for key, value in options.items():
if isinstance(value, str):
value_sql = repr(value)
opts.append(f"{key} = {value_sql}")
elif isinstance(value, bool):
opts.append(f"{key} = {str(value).upper()}")
elif isinstance(value, list):
list_str = ", ".join([repr(v) for v in value])
opts.append(f"{key} = [{list_str}]")
else:
opts.append(f"{key} = {value}")
options_str = ", ".join(opts)
statement.append(f"OPTIONS ({options_str})")

return " ".join(statement)
36 changes: 36 additions & 0 deletions tests/system/large/bigquery/test_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import bigframes.bigquery as bbq


def test_create_external_table(session, dataset_id, bq_connection):
table_name = f"{dataset_id}.test_object_table"
uri = "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*"

# Create the external table
table = bbq.create_external_table(
table_name,
connection_name=bq_connection,
options={"object_metadata": "SIMPLE", "uris": [uri]},
session=session,
)
assert table is not None

# Read the table to verify
import bigframes.pandas as bpd

bf_df = bpd.read_gbq(table_name)
pd_df = bf_df.to_pandas()
assert len(pd_df) > 0
95 changes: 95 additions & 0 deletions tests/unit/bigquery/test_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import pytest

import bigframes.bigquery.table
import bigframes.core.sql.table
import bigframes.session


@pytest.fixture
def mock_session():
return mock.create_autospec(spec=bigframes.session.Session)


def test_create_external_table_ddl():
sql = bigframes.core.sql.table.create_external_table_ddl(
"my-project.my_dataset.my_table",
columns={"col1": "INT64", "col2": "STRING"},
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
)
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
assert sql == expected


def test_create_external_table_ddl_replace():
sql = bigframes.core.sql.table.create_external_table_ddl(
"my-project.my_dataset.my_table",
replace=True,
columns={"col1": "INT64", "col2": "STRING"},
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
)
expected = "CREATE OR REPLACE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
assert sql == expected


def test_create_external_table_ddl_if_not_exists():
sql = bigframes.core.sql.table.create_external_table_ddl(
"my-project.my_dataset.my_table",
if_not_exists=True,
columns={"col1": "INT64", "col2": "STRING"},
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
)
expected = "CREATE EXTERNAL TABLE IF NOT EXISTS my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
assert sql == expected


def test_create_external_table_ddl_partition_columns():
sql = bigframes.core.sql.table.create_external_table_ddl(
"my-project.my_dataset.my_table",
columns={"col1": "INT64", "col2": "STRING"},
partition_columns={"part1": "DATE", "part2": "STRING"},
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
)
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) WITH PARTITION COLUMNS (part1 DATE, part2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
assert sql == expected


def test_create_external_table_ddl_connection():
sql = bigframes.core.sql.table.create_external_table_ddl(
"my-project.my_dataset.my_table",
columns={"col1": "INT64", "col2": "STRING"},
connection_name="my-connection",
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
)
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) WITH CONNECTION `my-connection` OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
assert sql == expected


@mock.patch("bigframes.bigquery.table._get_table_metadata")
def test_create_external_table(get_table_metadata_mock, mock_session):
bigframes.bigquery.table.create_external_table(
"my-project.my_dataset.my_table",
columns={"col1": "INT64", "col2": "STRING"},
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
session=mock_session,
)
mock_session.read_gbq_query.assert_called_once()
generated_sql = mock_session.read_gbq_query.call_args[0][0]
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
assert generated_sql == expected
get_table_metadata_mock.assert_called_once()
Loading