Spaces:
Runtime error
Runtime error
""" Google BigQuery support """ | |
from __future__ import annotations | |
from typing import ( | |
TYPE_CHECKING, | |
Any, | |
) | |
from pandas.compat._optional import import_optional_dependency | |
if TYPE_CHECKING: | |
from pandas import DataFrame | |
def _try_import(): | |
# since pandas is a dependency of pandas-gbq | |
# we need to import on first use | |
msg = ( | |
"pandas-gbq is required to load data from Google BigQuery. " | |
"See the docs: https://pandas-gbq.readthedocs.io." | |
) | |
pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) | |
return pandas_gbq | |
def read_gbq( | |
query: str, | |
project_id: str | None = None, | |
index_col: str | None = None, | |
col_order: list[str] | None = None, | |
reauth: bool = False, | |
auth_local_webserver: bool = True, | |
dialect: str | None = None, | |
location: str | None = None, | |
configuration: dict[str, Any] | None = None, | |
credentials=None, | |
use_bqstorage_api: bool | None = None, | |
max_results: int | None = None, | |
progress_bar_type: str | None = None, | |
) -> DataFrame: | |
""" | |
Load data from Google BigQuery. | |
This function requires the `pandas-gbq package | |
<https://pandas-gbq.readthedocs.io>`__. | |
See the `How to authenticate with Google BigQuery | |
<https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__ | |
guide for authentication instructions. | |
Parameters | |
---------- | |
query : str | |
SQL-Like Query to return data values. | |
project_id : str, optional | |
Google BigQuery Account project ID. Optional when available from | |
the environment. | |
index_col : str, optional | |
Name of result column to use for index in results DataFrame. | |
col_order : list(str), optional | |
List of BigQuery column names in the desired order for results | |
DataFrame. | |
reauth : bool, default False | |
Force Google BigQuery to re-authenticate the user. This is useful | |
if multiple accounts are used. | |
auth_local_webserver : bool, default True | |
Use the `local webserver flow`_ instead of the `console flow`_ | |
when getting user credentials. | |
.. _local webserver flow: | |
https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server | |
.. _console flow: | |
https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console | |
*New in version 0.2.0 of pandas-gbq*. | |
.. versionchanged:: 1.5.0 | |
Default value is changed to ``True``. Google has deprecated the | |
``auth_local_webserver = False`` `"out of band" (copy-paste) | |
flow | |
<https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_. | |
dialect : str, default 'legacy' | |
Note: The default value is changing to 'standard' in a future version. | |
SQL syntax dialect to use. Value can be one of: | |
``'legacy'`` | |
Use BigQuery's legacy SQL dialect. For more information see | |
`BigQuery Legacy SQL Reference | |
<https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__. | |
``'standard'`` | |
Use BigQuery's standard SQL, which is | |
compliant with the SQL 2011 standard. For more information | |
see `BigQuery Standard SQL Reference | |
<https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__. | |
location : str, optional | |
Location where the query job should run. See the `BigQuery locations | |
documentation | |
<https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a | |
list of available locations. The location must match that of any | |
datasets used in the query. | |
*New in version 0.5.0 of pandas-gbq*. | |
configuration : dict, optional | |
Query config parameters for job processing. | |
For example: | |
configuration = {'query': {'useQueryCache': False}} | |
For more information see `BigQuery REST API Reference | |
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__. | |
credentials : google.auth.credentials.Credentials, optional | |
Credentials for accessing Google APIs. Use this parameter to override | |
default credentials, such as to use Compute Engine | |
:class:`google.auth.compute_engine.Credentials` or Service Account | |
:class:`google.oauth2.service_account.Credentials` directly. | |
*New in version 0.8.0 of pandas-gbq*. | |
use_bqstorage_api : bool, default False | |
Use the `BigQuery Storage API | |
<https://cloud.google.com/bigquery/docs/reference/storage/>`__ to | |
download query results quickly, but at an increased cost. To use this | |
API, first `enable it in the Cloud Console | |
<https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__. | |
You must also have the `bigquery.readsessions.create | |
<https://cloud.google.com/bigquery/docs/access-control#roles>`__ | |
permission on the project you are billing queries to. | |
This feature requires version 0.10.0 or later of the ``pandas-gbq`` | |
package. It also requires the ``google-cloud-bigquery-storage`` and | |
``fastavro`` packages. | |
.. versionadded:: 0.25.0 | |
max_results : int, optional | |
If set, limit the maximum number of rows to fetch from the query | |
results. | |
*New in version 0.12.0 of pandas-gbq*. | |
.. versionadded:: 1.1.0 | |
progress_bar_type : Optional, str | |
If set, use the `tqdm <https://tqdm.github.io/>`__ library to | |
display a progress bar while the data downloads. Install the | |
``tqdm`` package to use this feature. | |
Possible values of ``progress_bar_type`` include: | |
``None`` | |
No progress bar. | |
``'tqdm'`` | |
Use the :func:`tqdm.tqdm` function to print a progress bar | |
to :data:`sys.stderr`. | |
``'tqdm_notebook'`` | |
Use the :func:`tqdm.tqdm_notebook` function to display a | |
progress bar as a Jupyter notebook widget. | |
``'tqdm_gui'`` | |
Use the :func:`tqdm.tqdm_gui` function to display a | |
progress bar as a graphical dialog box. | |
Note that this feature requires version 0.12.0 or later of the | |
``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly | |
different than ``pandas-gbq``, here the default is ``None``. | |
.. versionadded:: 1.0.0 | |
Returns | |
------- | |
df: DataFrame | |
DataFrame representing results of query. | |
See Also | |
-------- | |
pandas_gbq.read_gbq : This function in the pandas-gbq library. | |
DataFrame.to_gbq : Write a DataFrame to Google BigQuery. | |
""" | |
pandas_gbq = _try_import() | |
kwargs: dict[str, str | bool | int | None] = {} | |
# START: new kwargs. Don't populate unless explicitly set. | |
if use_bqstorage_api is not None: | |
kwargs["use_bqstorage_api"] = use_bqstorage_api | |
if max_results is not None: | |
kwargs["max_results"] = max_results | |
kwargs["progress_bar_type"] = progress_bar_type | |
# END: new kwargs | |
return pandas_gbq.read_gbq( | |
query, | |
project_id=project_id, | |
index_col=index_col, | |
col_order=col_order, | |
reauth=reauth, | |
auth_local_webserver=auth_local_webserver, | |
dialect=dialect, | |
location=location, | |
configuration=configuration, | |
credentials=credentials, | |
**kwargs, | |
) | |
def to_gbq( | |
dataframe: DataFrame, | |
destination_table: str, | |
project_id: str | None = None, | |
chunksize: int | None = None, | |
reauth: bool = False, | |
if_exists: str = "fail", | |
auth_local_webserver: bool = True, | |
table_schema: list[dict[str, str]] | None = None, | |
location: str | None = None, | |
progress_bar: bool = True, | |
credentials=None, | |
) -> None: | |
pandas_gbq = _try_import() | |
pandas_gbq.to_gbq( | |
dataframe, | |
destination_table, | |
project_id=project_id, | |
chunksize=chunksize, | |
reauth=reauth, | |
if_exists=if_exists, | |
auth_local_webserver=auth_local_webserver, | |
table_schema=table_schema, | |
location=location, | |
progress_bar=progress_bar, | |
credentials=credentials, | |
) | |