diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 0bbbc418e6..5728f153dd 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -60,6 +60,7 @@ from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct +from bigframes.bigquery.table import create_external_table from bigframes.core.logging import log_adapter _functions = [ @@ -104,6 +105,8 @@ sql_scalar, # struct ops struct, + # table ops + create_external_table, ] _module = sys.modules[__name__] @@ -155,6 +158,8 @@ "sql_scalar", # struct ops "struct", + # table ops + "create_external_table", # Modules / SQL namespaces "ai", "ml", diff --git a/bigframes/bigquery/table.py b/bigframes/bigquery/table.py new file mode 100644 index 0000000000..51ff78d4c8 --- /dev/null +++ b/bigframes/bigquery/table.py @@ -0,0 +1,103 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Mapping, Optional, Union + +import bigframes_vendored.constants +import google.cloud.bigquery +import pandas as pd + +import bigframes.core.logging.log_adapter as log_adapter +import bigframes.core.sql.table +import bigframes.session + + +def _get_table_metadata( + *, + bqclient: google.cloud.bigquery.Client, + table_name: str, +) -> pd.Series: + table_metadata = bqclient.get_table(table_name) + table_dict = table_metadata.to_api_repr() + return pd.Series(table_dict) + + +@log_adapter.method_logger(custom_base_name="bigquery_table") +def create_external_table( + table_name: str, + *, + replace: bool = False, + if_not_exists: bool = False, + columns: Optional[Mapping[str, str]] = None, + partition_columns: Optional[Mapping[str, str]] = None, + connection_name: Optional[str] = None, + options: Mapping[str, Union[str, int, float, bool, list]], + session: Optional[bigframes.session.Session] = None, +) -> pd.Series: + """ + Creates a BigQuery external table. + + See the `BigQuery CREATE EXTERNAL TABLE DDL syntax + `_ + for additional reference. + + Args: + table_name (str): + The name of the table in BigQuery. + replace (bool, default False): + Whether to replace the table if it already exists. + if_not_exists (bool, default False): + Whether to ignore the error if the table already exists. + columns (Mapping[str, str], optional): + The table's schema. + partition_columns (Mapping[str, str], optional): + The table's partition columns. + connection_name (str, optional): + The connection to use for the table. + options (Mapping[str, Union[str, int, float, bool, list]]): + The OPTIONS clause, which specifies the table options. + session (bigframes.session.Session, optional): + The session to use. If not provided, the default session is used. + + Returns: + pandas.Series: + A Series with object dtype containing the table metadata. Reference + the `BigQuery Table REST API reference + `_ + for available fields. + """ + import bigframes.pandas as bpd + + sql = bigframes.core.sql.table.create_external_table_ddl( + table_name=table_name, + replace=replace, + if_not_exists=if_not_exists, + columns=columns, + partition_columns=partition_columns, + connection_name=connection_name, + options=options, + ) + + if session is None: + bpd.read_gbq_query(sql) + session = bpd.get_global_session() + assert ( + session is not None + ), f"Missing connection to BigQuery. Please report how you encountered this error at {bigframes_vendored.constants.FEEDBACK_LINK}." + else: + session.read_gbq_query(sql) + + return _get_table_metadata(bqclient=session.bqclient, table_name=table_name) diff --git a/bigframes/core/sql/table.py b/bigframes/core/sql/table.py new file mode 100644 index 0000000000..24a97ed159 --- /dev/null +++ b/bigframes/core/sql/table.py @@ -0,0 +1,68 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Mapping, Optional, Union + + +def create_external_table_ddl( + table_name: str, + *, + replace: bool = False, + if_not_exists: bool = False, + columns: Optional[Mapping[str, str]] = None, + partition_columns: Optional[Mapping[str, str]] = None, + connection_name: Optional[str] = None, + options: Mapping[str, Union[str, int, float, bool, list]], +) -> str: + """Generates the CREATE EXTERNAL TABLE DDL statement.""" + statement = ["CREATE"] + if replace: + statement.append("OR REPLACE") + statement.append("EXTERNAL TABLE") + if if_not_exists: + statement.append("IF NOT EXISTS") + statement.append(table_name) + + if columns: + column_defs = ", ".join([f"{name} {typ}" for name, typ in columns.items()]) + statement.append(f"({column_defs})") + + if connection_name: + statement.append(f"WITH CONNECTION `{connection_name}`") + + if partition_columns: + part_defs = ", ".join( + [f"{name} {typ}" for name, typ in partition_columns.items()] + ) + statement.append(f"WITH PARTITION COLUMNS ({part_defs})") + + if options: + opts = [] + for key, value in options.items(): + if isinstance(value, str): + value_sql = repr(value) + opts.append(f"{key} = {value_sql}") + elif isinstance(value, bool): + opts.append(f"{key} = {str(value).upper()}") + elif isinstance(value, list): + list_str = ", ".join([repr(v) for v in value]) + opts.append(f"{key} = [{list_str}]") + else: + opts.append(f"{key} = {value}") + options_str = ", ".join(opts) + statement.append(f"OPTIONS ({options_str})") + + return " ".join(statement) diff --git a/tests/system/large/bigquery/test_table.py b/tests/system/large/bigquery/test_table.py new file mode 100644 index 0000000000..dd956b3a04 --- /dev/null +++ b/tests/system/large/bigquery/test_table.py @@ -0,0 +1,36 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.bigquery as bbq + + +def test_create_external_table(session, dataset_id, bq_connection): + table_name = f"{dataset_id}.test_object_table" + uri = "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*" + + # Create the external table + table = bbq.create_external_table( + table_name, + connection_name=bq_connection, + options={"object_metadata": "SIMPLE", "uris": [uri]}, + session=session, + ) + assert table is not None + + # Read the table to verify + import bigframes.pandas as bpd + + bf_df = bpd.read_gbq(table_name) + pd_df = bf_df.to_pandas() + assert len(pd_df) > 0 diff --git a/tests/unit/bigquery/test_table.py b/tests/unit/bigquery/test_table.py new file mode 100644 index 0000000000..441130d53d --- /dev/null +++ b/tests/unit/bigquery/test_table.py @@ -0,0 +1,95 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pytest + +import bigframes.bigquery.table +import bigframes.core.sql.table +import bigframes.session + + +@pytest.fixture +def mock_session(): + return mock.create_autospec(spec=bigframes.session.Session) + + +def test_create_external_table_ddl(): + sql = bigframes.core.sql.table.create_external_table_ddl( + "my-project.my_dataset.my_table", + columns={"col1": "INT64", "col2": "STRING"}, + options={"format": "CSV", "uris": ["gs://bucket/path*"]}, + ) + expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])" + assert sql == expected + + +def test_create_external_table_ddl_replace(): + sql = bigframes.core.sql.table.create_external_table_ddl( + "my-project.my_dataset.my_table", + replace=True, + columns={"col1": "INT64", "col2": "STRING"}, + options={"format": "CSV", "uris": ["gs://bucket/path*"]}, + ) + expected = "CREATE OR REPLACE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])" + assert sql == expected + + +def test_create_external_table_ddl_if_not_exists(): + sql = bigframes.core.sql.table.create_external_table_ddl( + "my-project.my_dataset.my_table", + if_not_exists=True, + columns={"col1": "INT64", "col2": "STRING"}, + options={"format": "CSV", "uris": ["gs://bucket/path*"]}, + ) + expected = "CREATE EXTERNAL TABLE IF NOT EXISTS my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])" + assert sql == expected + + +def test_create_external_table_ddl_partition_columns(): + sql = bigframes.core.sql.table.create_external_table_ddl( + "my-project.my_dataset.my_table", + columns={"col1": "INT64", "col2": "STRING"}, + partition_columns={"part1": "DATE", "part2": "STRING"}, + options={"format": "CSV", "uris": ["gs://bucket/path*"]}, + ) + expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) WITH PARTITION COLUMNS (part1 DATE, part2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])" + assert sql == expected + + +def test_create_external_table_ddl_connection(): + sql = bigframes.core.sql.table.create_external_table_ddl( + "my-project.my_dataset.my_table", + columns={"col1": "INT64", "col2": "STRING"}, + connection_name="my-connection", + options={"format": "CSV", "uris": ["gs://bucket/path*"]}, + ) + expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) WITH CONNECTION `my-connection` OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])" + assert sql == expected + + +@mock.patch("bigframes.bigquery.table._get_table_metadata") +def test_create_external_table(get_table_metadata_mock, mock_session): + bigframes.bigquery.table.create_external_table( + "my-project.my_dataset.my_table", + columns={"col1": "INT64", "col2": "STRING"}, + options={"format": "CSV", "uris": ["gs://bucket/path*"]}, + session=mock_session, + ) + mock_session.read_gbq_query.assert_called_once() + generated_sql = mock_session.read_gbq_query.call_args[0][0] + expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])" + assert generated_sql == expected + get_table_metadata_mock.assert_called_once()