Load data from DataFrame
Stay organized with collections
Save and categorize content based on your preferences.
Load contents of a pandas DataFrame to a table.
Code sample
Python
Before trying this sample, follow the Python setup instructions in the
BigQuery quickstart using
client libraries.
For more information, see the
BigQuery Python API
reference documentation.
To authenticate to BigQuery, set up Application Default Credentials.
For more information, see
Set up authentication for client libraries.
import datetime
from google.cloud import bigquery
import pandas
import pytz
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name"
records = [
{
"title": "The Meaning of Life",
"release_year": 1983,
"length_minutes": 112.5,
"release_date": pytz.timezone("Europe/Paris")
.localize(datetime.datetime(1983, 5, 9, 13, 0, 0))
.astimezone(pytz.utc),
# Assume UTC timezone when a datetime object contains no timezone.
"dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0),
},
{
"title": "Monty Python and the Holy Grail",
"release_year": 1975,
"length_minutes": 91.5,
"release_date": pytz.timezone("Europe/London")
.localize(datetime.datetime(1975, 4, 9, 23, 59, 2))
.astimezone(pytz.utc),
"dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0),
},
{
"title": "Life of Brian",
"release_year": 1979,
"length_minutes": 94.25,
"release_date": pytz.timezone("America/New_York")
.localize(datetime.datetime(1979, 8, 17, 23, 59, 5))
.astimezone(pytz.utc),
"dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0),
},
{
"title": "And Now for Something Completely Different",
"release_year": 1971,
"length_minutes": 88.0,
"release_date": pytz.timezone("Europe/London")
.localize(datetime.datetime(1971, 9, 28, 23, 59, 7))
.astimezone(pytz.utc),
"dvd_release": datetime.datetime(2003, 10, 22, 10, 0, 0),
},
]
dataframe = pandas.DataFrame(
records,
# In the loaded table, the column order reflects the order of the
# columns in the DataFrame.
columns=[
"title",
"release_year",
"length_minutes",
"release_date",
"dvd_release",
],
# Optionally, set a named index, which can also be written to the
# BigQuery table.
index=pandas.Index(
["Q24980", "Q25043", "Q24953", "Q16403"], name="wikidata_id"
),
)
job_config = bigquery.LoadJobConfig(
# Specify a (partial) schema. All columns are always written to the
# table. The schema is used to assist in data type definitions.
schema=[
# Specify the type of columns whose type cannot be auto-detected. For
# example the "title" column uses pandas dtype "object", so its
# data type is ambiguous.
bigquery.SchemaField("title", bigquery.enums.SqlTypeNames.STRING),
# Indexes are written if included in the schema by name.
bigquery.SchemaField("wikidata_id", bigquery.enums.SqlTypeNames.STRING),
],
# Optionally, set the write disposition. BigQuery appends loaded rows
# to an existing table by default, but with WRITE_TRUNCATE write
# disposition it replaces the table with the loaded data.
write_disposition="WRITE_TRUNCATE",
)
job = client.load_table_from_dataframe(
dataframe, table_id, job_config=job_config
) # Make an API request.
job.result() # Wait for the job to complete.
table = client.get_table(table_id) # Make an API request.
print(
"Loaded {} rows and {} columns to {}".format(
table.num_rows, len(table.schema), table_id
)
)
Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.
[[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Hard to understand","hardToUnderstand","thumb-down"],["Incorrect information or sample code","incorrectInformationOrSampleCode","thumb-down"],["Missing the information/samples I need","missingTheInformationSamplesINeed","thumb-down"],["Other","otherDown","thumb-down"]],[],[[["\u003cp\u003eThis code sample demonstrates how to load the contents of a pandas DataFrame into a BigQuery table using the Google Cloud client library for Python.\u003c/p\u003e\n"],["\u003cp\u003eThe code utilizes a sample DataFrame with movie data, including title, release year, length, and release dates, to illustrate the data loading process.\u003c/p\u003e\n"],["\u003cp\u003eThe \u003ccode\u003eLoadJobConfig\u003c/code\u003e allows for specifying a schema to define data types and setting the \u003ccode\u003ewrite_disposition\u003c/code\u003e to control how the data is written to the table (e.g., appending or truncating).\u003c/p\u003e\n"],["\u003cp\u003eAuthentication to BigQuery is necessary and can be achieved by setting up Application Default Credentials, as indicated by the documentation links provided.\u003c/p\u003e\n"],["\u003cp\u003eAfter loading, the code confirms the successful transfer of the DataFrame into a BigQuery table by getting the table information and printing its row count, column number, and the table ID.\u003c/p\u003e\n"]]],[],null,["# Load data from DataFrame\n\nLoad contents of a pandas DataFrame to a table.\n\nCode sample\n-----------\n\n### Python\n\n\nBefore trying this sample, follow the Python setup instructions in the\n[BigQuery quickstart using\nclient libraries](/bigquery/docs/quickstarts/quickstart-client-libraries).\n\n\nFor more information, see the\n[BigQuery Python API\nreference documentation](/python/docs/reference/bigquery/latest).\n\n\nTo authenticate to BigQuery, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for client libraries](/bigquery/docs/authentication#client-libs).\n\n import datetime\n\n from google.cloud import https://cloud.google.com/python/docs/reference/bigquery/latest/\n import pandas\n import pytz\n\n # Construct a BigQuery client object.\n client = https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client.html()\n\n # TODO(developer): Set table_id to the ID of the table to create.\n # table_id = \"your-project.your_dataset.your_table_name\"\n\n records = [\n {\n \"title\": \"The Meaning of Life\",\n \"release_year\": 1983,\n \"length_minutes\": 112.5,\n \"release_date\": pytz.timezone(\"Europe/Paris\")\n .localize(datetime.datetime(1983, 5, 9, 13, 0, 0))\n .astimezone(pytz.utc),\n # Assume UTC timezone when a datetime object contains no timezone.\n \"dvd_release\": datetime.datetime(2002, 1, 22, 7, 0, 0),\n },\n {\n \"title\": \"Monty Python and the Holy Grail\",\n \"release_year\": 1975,\n \"length_minutes\": 91.5,\n \"release_date\": pytz.timezone(\"Europe/London\")\n .localize(datetime.datetime(1975, 4, 9, 23, 59, 2))\n .astimezone(pytz.utc),\n \"dvd_release\": datetime.datetime(2002, 7, 16, 9, 0, 0),\n },\n {\n \"title\": \"Life of Brian\",\n \"release_year\": 1979,\n \"length_minutes\": 94.25,\n \"release_date\": pytz.timezone(\"America/New_York\")\n .localize(datetime.datetime(1979, 8, 17, 23, 59, 5))\n .astimezone(pytz.utc),\n \"dvd_release\": datetime.datetime(2008, 1, 14, 8, 0, 0),\n },\n {\n \"title\": \"And Now for Something Completely Different\",\n \"release_year\": 1971,\n \"length_minutes\": 88.0,\n \"release_date\": pytz.timezone(\"Europe/London\")\n .localize(datetime.datetime(1971, 9, 28, 23, 59, 7))\n .astimezone(pytz.utc),\n \"dvd_release\": datetime.datetime(2003, 10, 22, 10, 0, 0),\n },\n ]\n dataframe = pandas.DataFrame(\n records,\n # In the loaded table, the column order reflects the order of the\n # columns in the DataFrame.\n columns=[\n \"title\",\n \"release_year\",\n \"length_minutes\",\n \"release_date\",\n \"dvd_release\",\n ],\n # Optionally, set a named index, which can also be written to the\n # BigQuery table.\n index=pandas.Index(\n [\"Q24980\", \"Q25043\", \"Q24953\", \"Q16403\"], name=\"wikidata_id\"\n ),\n )\n job_config = https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.LoadJobConfig.html(\n # Specify a (partial) schema. All columns are always written to the\n # table. The schema is used to assist in data type definitions.\n schema=[\n # Specify the type of columns whose type cannot be auto-detected. For\n # example the \"title\" column uses pandas dtype \"object\", so its\n # data type is ambiguous.\n https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.schema.SchemaField.html(\"title\", https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.SqlTypeNames.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.DecimalTargetType.html#google_cloud_bigquery_enums_DecimalTargetType_STRING),\n # Indexes are written if included in the schema by name.\n https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.schema.SchemaField.html(\"wikidata_id\", https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.SqlTypeNames.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.DecimalTargetType.html#google_cloud_bigquery_enums_DecimalTargetType_STRING),\n ],\n # Optionally, set the write disposition. BigQuery appends loaded rows\n # to an existing table by default, but with WRITE_TRUNCATE write\n # disposition it replaces the table with the loaded data.\n write_disposition=\"WRITE_TRUNCATE\",\n )\n\n job = client.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client.html#google_cloud_bigquery_client_Client_load_table_from_dataframe(\n dataframe, table_id, job_config=job_config\n ) # Make an API request.\n https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.html.result() # Wait for the job to complete.\n\n table = client.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client.html#google_cloud_bigquery_client_Client_get_table(table_id) # Make an API request.\n print(\n \"Loaded {} rows and {} columns to {}\".format(\n table.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.Table.html#google_cloud_bigquery_table_Table_num_rows, len(table.schema), table_id\n )\n )\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=bigquery)."]]