Load data from DataFrame
Stay organized with collections
Save and categorize content based on your preferences.
Load contents of a pandas DataFrame to a table.
Code sample
Python
Before trying this sample, follow the Python
setup instructions in the BigQuery quickstart using
client libraries
.
For more information, see the BigQuery Python
API
reference documentation
.
To authenticate to BigQuery, set up Application Default Credentials.
For more information, see Set up authentication for client libraries
.
import
datetime
from
google.cloud
import
bigquery
import
pandas
import
pytz
# Construct a BigQuery client object.
client
=
bigquery
.
Client
()
# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name"
records
=
[
{
"title"
:
"The Meaning of Life"
,
"release_year"
:
1983
,
"length_minutes"
:
112.5
,
"release_date"
:
pytz
.
timezone
(
"Europe/Paris"
)
.
localize
(
datetime
.
datetime
(
1983
,
5
,
9
,
13
,
0
,
0
))
.
astimezone
(
pytz
.
utc
),
# Assume UTC timezone when a datetime object contains no timezone.
"dvd_release"
:
datetime
.
datetime
(
2002
,
1
,
22
,
7
,
0
,
0
),
},
{
"title"
:
"Monty Python and the Holy Grail"
,
"release_year"
:
1975
,
"length_minutes"
:
91.5
,
"release_date"
:
pytz
.
timezone
(
"Europe/London"
)
.
localize
(
datetime
.
datetime
(
1975
,
4
,
9
,
23
,
59
,
2
))
.
astimezone
(
pytz
.
utc
),
"dvd_release"
:
datetime
.
datetime
(
2002
,
7
,
16
,
9
,
0
,
0
),
},
{
"title"
:
"Life of Brian"
,
"release_year"
:
1979
,
"length_minutes"
:
94.25
,
"release_date"
:
pytz
.
timezone
(
"America/New_York"
)
.
localize
(
datetime
.
datetime
(
1979
,
8
,
17
,
23
,
59
,
5
))
.
astimezone
(
pytz
.
utc
),
"dvd_release"
:
datetime
.
datetime
(
2008
,
1
,
14
,
8
,
0
,
0
),
},
{
"title"
:
"And Now for Something Completely Different"
,
"release_year"
:
1971
,
"length_minutes"
:
88.0
,
"release_date"
:
pytz
.
timezone
(
"Europe/London"
)
.
localize
(
datetime
.
datetime
(
1971
,
9
,
28
,
23
,
59
,
7
))
.
astimezone
(
pytz
.
utc
),
"dvd_release"
:
datetime
.
datetime
(
2003
,
10
,
22
,
10
,
0
,
0
),
},
]
dataframe
=
pandas
.
DataFrame
(
records
,
# In the loaded table, the column order reflects the order of the
# columns in the DataFrame.
columns
=
[
"title"
,
"release_year"
,
"length_minutes"
,
"release_date"
,
"dvd_release"
,
],
# Optionally, set a named index, which can also be written to the
# BigQuery table.
index
=
pandas
.
Index
(
[
"Q24980"
,
"Q25043"
,
"Q24953"
,
"Q16403"
],
name
=
"wikidata_id"
),
)
job_config
=
bigquery
.
LoadJobConfig
(
# Specify a (partial) schema. All columns are always written to the
# table. The schema is used to assist in data type definitions.
schema
=
[
# Specify the type of columns whose type cannot be auto-detected. For
# example the "title" column uses pandas dtype "object", so its
# data type is ambiguous.
bigquery
.
SchemaField
(
"title"
,
bigquery
.
enums
.
SqlTypeNames
.
STRING
),
# Indexes are written if included in the schema by name.
bigquery
.
SchemaField
(
"wikidata_id"
,
bigquery
.
enums
.
SqlTypeNames
.
STRING
),
],
# Optionally, set the write disposition. BigQuery appends loaded rows
# to an existing table by default, but with WRITE_TRUNCATE write
# disposition it replaces the table with the loaded data.
write_disposition
=
"WRITE_TRUNCATE"
,
)
job
=
client
.
load_table_from_dataframe
(
dataframe
,
table_id
,
job_config
=
job_config
)
# Make an API request.
job
.
result
()
# Wait for the job to complete.
table
=
client
.
get_table
(
table_id
)
# Make an API request.
print
(
"Loaded
{}
rows and
{}
columns to
{}
"
.
format
(
table
.
num_rows
,
len
(
table
.
schema
),
table_id
)
)
Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License
, and code samples are licensed under the Apache 2.0 License
. For details, see the Google Developers Site Policies
. Java is a registered trademark of Oracle and/or its affiliates.
[[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Hard to understand","hardToUnderstand","thumb-down"],["Incorrect information or sample code","incorrectInformationOrSampleCode","thumb-down"],["Missing the information/samples I need","missingTheInformationSamplesINeed","thumb-down"],["Other","otherDown","thumb-down"]],[],[[["\u003cp\u003eThis code sample demonstrates how to load the contents of a pandas DataFrame into a BigQuery table using the Google Cloud client library for Python.\u003c/p\u003e\n"],["\u003cp\u003eThe code utilizes a sample DataFrame with movie data, including title, release year, length, and release dates, to illustrate the data loading process.\u003c/p\u003e\n"],["\u003cp\u003eThe \u003ccode\u003eLoadJobConfig\u003c/code\u003e allows for specifying a schema to define data types and setting the \u003ccode\u003ewrite_disposition\u003c/code\u003e to control how the data is written to the table (e.g., appending or truncating).\u003c/p\u003e\n"],["\u003cp\u003eAuthentication to BigQuery is necessary and can be achieved by setting up Application Default Credentials, as indicated by the documentation links provided.\u003c/p\u003e\n"],["\u003cp\u003eAfter loading, the code confirms the successful transfer of the DataFrame into a BigQuery table by getting the table information and printing its row count, column number, and the table ID.\u003c/p\u003e\n"]]],[],null,["# Load data from DataFrame\n\nLoad contents of a pandas DataFrame to a table.\n\nCode sample\n-----------\n\n### Python\n\n\nBefore trying this sample, follow the Python setup instructions in the\n[BigQuery quickstart using\nclient libraries](/bigquery/docs/quickstarts/quickstart-client-libraries).\n\n\nFor more information, see the\n[BigQuery Python API\nreference documentation](/python/docs/reference/bigquery/latest).\n\n\nTo authenticate to BigQuery, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for client libraries](/bigquery/docs/authentication#client-libs).\n\n import datetime\n\n from google.cloud import https://cloud.google.com/python/docs/reference/bigquery/latest/\n import pandas\n import pytz\n\n # Construct a BigQuery client object.\n client = https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client.html()\n\n # TODO(developer): Set table_id to the ID of the table to create.\n # table_id = \"your-project.your_dataset.your_table_name\"\n\n records = [\n {\n \"title\": \"The Meaning of Life\",\n \"release_year\": 1983,\n \"length_minutes\": 112.5,\n \"release_date\": pytz.timezone(\"Europe/Paris\")\n .localize(datetime.datetime(1983, 5, 9, 13, 0, 0))\n .astimezone(pytz.utc),\n # Assume UTC timezone when a datetime object contains no timezone.\n \"dvd_release\": datetime.datetime(2002, 1, 22, 7, 0, 0),\n },\n {\n \"title\": \"Monty Python and the Holy Grail\",\n \"release_year\": 1975,\n \"length_minutes\": 91.5,\n \"release_date\": pytz.timezone(\"Europe/London\")\n .localize(datetime.datetime(1975, 4, 9, 23, 59, 2))\n .astimezone(pytz.utc),\n \"dvd_release\": datetime.datetime(2002, 7, 16, 9, 0, 0),\n },\n {\n \"title\": \"Life of Brian\",\n \"release_year\": 1979,\n \"length_minutes\": 94.25,\n \"release_date\": pytz.timezone(\"America/New_York\")\n .localize(datetime.datetime(1979, 8, 17, 23, 59, 5))\n .astimezone(pytz.utc),\n \"dvd_release\": datetime.datetime(2008, 1, 14, 8, 0, 0),\n },\n {\n \"title\": \"And Now for Something Completely Different\",\n \"release_year\": 1971,\n \"length_minutes\": 88.0,\n \"release_date\": pytz.timezone(\"Europe/London\")\n .localize(datetime.datetime(1971, 9, 28, 23, 59, 7))\n .astimezone(pytz.utc),\n \"dvd_release\": datetime.datetime(2003, 10, 22, 10, 0, 0),\n },\n ]\n dataframe = pandas.DataFrame(\n records,\n # In the loaded table, the column order reflects the order of the\n # columns in the DataFrame.\n columns=[\n \"title\",\n \"release_year\",\n \"length_minutes\",\n \"release_date\",\n \"dvd_release\",\n ],\n # Optionally, set a named index, which can also be written to the\n # BigQuery table.\n index=pandas.Index(\n [\"Q24980\", \"Q25043\", \"Q24953\", \"Q16403\"], name=\"wikidata_id\"\n ),\n )\n job_config = https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.LoadJobConfig.html(\n # Specify a (partial) schema. All columns are always written to the\n # table. The schema is used to assist in data type definitions.\n schema=[\n # Specify the type of columns whose type cannot be auto-detected. For\n # example the \"title\" column uses pandas dtype \"object\", so its\n # data type is ambiguous.\n https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.schema.SchemaField.html(\"title\", https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.SqlTypeNames.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.DecimalTargetType.html#google_cloud_bigquery_enums_DecimalTargetType_STRING),\n # Indexes are written if included in the schema by name.\n https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.schema.SchemaField.html(\"wikidata_id\", https://cloud.google.com/python/docs/reference/bigquery/latest/.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.SqlTypeNames.html.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.enums.DecimalTargetType.html#google_cloud_bigquery_enums_DecimalTargetType_STRING),\n ],\n # Optionally, set the write disposition. BigQuery appends loaded rows\n # to an existing table by default, but with WRITE_TRUNCATE write\n # disposition it replaces the table with the loaded data.\n write_disposition=\"WRITE_TRUNCATE\",\n )\n\n job = client.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client.html#google_cloud_bigquery_client_Client_load_table_from_dataframe(\n dataframe, table_id, job_config=job_config\n ) # Make an API request.\n https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.html.result() # Wait for the job to complete.\n\n table = client.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client.html#google_cloud_bigquery_client_Client_get_table(table_id) # Make an API request.\n print(\n \"Loaded {} rows and {} columns to {}\".format(\n table.https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.Table.html#google_cloud_bigquery_table_Table_num_rows, len(table.schema), table_id\n )\n )\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=bigquery)."]]