Load a Parquet file

Load a Parquet file from Cloud Storage into a new table.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Go

Before trying this sample, follow the Go setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Go API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "cloud.google.com/go/bigquery" 
 ) 
 // importParquet demonstrates loading Apache Parquet data from Cloud Storage into a table. 
 func 
  
 importParquet 
 ( 
 projectID 
 , 
  
 datasetID 
 , 
  
 tableID 
  
 string 
 ) 
  
 error 
  
 { 
  
 // projectID := "my-project-id" 
  
 // datasetID := "mydataset" 
  
 // tableID := "mytable" 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 bigquery 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
 projectID 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "bigquery.NewClient: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 gcsRef 
  
 := 
  
 bigquery 
 . 
  NewGCSReference 
 
 ( 
 "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" 
 ) 
  
 gcsRef 
 . 
 SourceFormat 
  
 = 
  
 bigquery 
 . 
  Parquet 
 
  
 gcsRef 
 . 
 AutoDetect 
  
 = 
  
 true 
  
 loader 
  
 := 
  
 client 
 . 
 Dataset 
 ( 
 datasetID 
 ). 
 Table 
 ( 
 tableID 
 ). 
  LoaderFrom 
 
 ( 
 gcsRef 
 ) 
  
 job 
 , 
  
 err 
  
 := 
  
 loader 
 . 
 Run 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 status 
 , 
  
 err 
  
 := 
  
 job 
 . 
 Wait 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 if 
  
 status 
 . 
  Err 
 
 () 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "job completed with error: %w" 
 , 
  
 status 
 . 
  Err 
 
 ()) 
  
 } 
  
 return 
  
 nil 
 } 
 

Java

Before trying this sample, follow the Java setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Java API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  import 
  
 com.google.cloud.bigquery. BigQuery 
 
 ; 
 import 
  
 com.google.cloud.bigquery. BigQueryException 
 
 ; 
 import 
  
 com.google.cloud.bigquery. BigQueryOptions 
 
 ; 
 import 
  
 com.google.cloud.bigquery. FormatOptions 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Job 
 
 ; 
 import 
  
 com.google.cloud.bigquery. JobInfo 
 
 ; 
 import 
  
 com.google.cloud.bigquery. LoadJobConfiguration 
 
 ; 
 import 
  
 com.google.cloud.bigquery. TableId 
 
 ; 
 import 
  
 java.math.BigInteger 
 ; 
 public 
  
 class 
 LoadParquet 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 datasetName 
  
 = 
  
 "MY_DATASET_NAME" 
 ; 
  
 String 
  
 sourceUri 
  
 = 
  
 "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" 
 ; 
  
 String 
  
 tableName 
  
 = 
  
 "us_states" 
 ; 
  
 loadParquet 
 ( 
 datasetName 
 , 
  
 tableName 
 , 
  
 sourceUri 
 ); 
  
 } 
  
 public 
  
 static 
  
 void 
  
 loadParquet 
 ( 
 String 
  
 datasetName 
 , 
  
 String 
  
 tableName 
 , 
  
 String 
  
 sourceUri 
 ) 
  
 { 
  
 try 
  
 { 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. 
  
  BigQuery 
 
  
 bigquery 
  
 = 
  
  BigQueryOptions 
 
 . 
 getDefaultInstance 
 (). 
 getService 
 (); 
  
  TableId 
 
  
 tableId 
  
 = 
  
  TableId 
 
 . 
 of 
 ( 
 datasetName 
 , 
  
 tableName 
 ); 
  
  LoadJobConfiguration 
 
  
 configuration 
  
 = 
  
  LoadJobConfiguration 
 
 . 
 builder 
 ( 
 tableId 
 , 
  
 sourceUri 
 ) 
  
 . 
 setFormatOptions 
 ( 
  FormatOptions 
 
 . 
  parquet 
 
 ()) 
  
 . 
 build 
 (); 
  
 // For more information on Job see: 
  
 // https://googleapis.dev/java/google-cloud-clients/latest/index.html?com/google/cloud/bigquery/package-summary.html 
  
 // Load the table 
  
  Job 
 
  
 job 
  
 = 
  
 bigquery 
 . 
  create 
 
 ( 
 JobInfo 
 . 
 of 
 ( 
 configuration 
 )); 
  
 // Blocks until this load table job completes its execution, either failing or succeeding. 
  
  Job 
 
  
 completedJob 
  
 = 
  
 job 
 . 
  waitFor 
 
 (); 
  
 if 
  
 ( 
 completedJob 
  
 == 
  
 null 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Job not executed since it no longer exists." 
 ); 
  
 return 
 ; 
  
 } 
  
 else 
  
 if 
  
 ( 
 completedJob 
 . 
 getStatus 
 (). 
 getError 
 () 
  
 != 
  
 null 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
  
 "BigQuery was unable to load the table due to an error: \n" 
  
 + 
  
 job 
 . 
 getStatus 
 (). 
 getError 
 ()); 
  
 return 
 ; 
  
 } 
  
 // Check number of rows loaded into the table 
  
 BigInteger 
  
 numRows 
  
 = 
  
 bigquery 
 . 
  getTable 
 
 ( 
 tableId 
 ). 
 getNumRows 
 (); 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Loaded %d rows. \n" 
 , 
  
 numRows 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "GCS parquet loaded successfully." 
 ); 
  
 } 
  
 catch 
  
 ( 
  BigQueryException 
 
  
 | 
  
 InterruptedException 
  
 e 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "GCS Parquet was not loaded. \n" 
  
 + 
  
 e 
 . 
 toString 
 ()); 
  
 } 
  
 } 
 } 
 

Node.js

Before trying this sample, follow the Node.js setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Node.js API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  // Import the Google Cloud client libraries 
 const 
  
 { 
 BigQuery 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/bigquery 
' 
 ); 
 const 
  
 { 
 Storage 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/storage 
' 
 ); 
 // Instantiate clients 
 const 
  
 bigquery 
  
 = 
  
 new 
  
  BigQuery 
 
 (); 
 const 
  
 storage 
  
 = 
  
 new 
  
 Storage 
 (); 
 /** 
 * This sample loads the Parquet file at 
 * https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.parquet 
 * 
 * TODO(developer): Replace the following lines with the path to your file. 
 */ 
 const 
  
 bucketName 
  
 = 
  
 'cloud-samples-data' 
 ; 
 const 
  
 filename 
  
 = 
  
 'bigquery/us-states/us-states.parquet' 
 ; 
 async 
  
 function 
  
 loadTableGCSParquet 
 () 
  
 { 
  
 // Imports a GCS file into a table with Parquet source format. 
  
 /** 
 * TODO(developer): Uncomment the following lines before running the sample. 
 */ 
  
 // const datasetId = 'my_dataset'; 
  
 // const tableId = 'my_table'; 
  
 // Configure the load job. For full list of options, see: 
  
 // https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad 
  
 const 
  
 metadata 
  
 = 
  
 { 
  
 sourceFormat 
 : 
  
 'PARQUET' 
 , 
  
 location 
 : 
  
 'US' 
 , 
  
 }; 
  
 // Load data from a Google Cloud Storage file into the table 
  
 const 
  
 [ 
 job 
 ] 
  
 = 
  
 await 
  
 bigquery 
  
 . 
 dataset 
 ( 
 datasetId 
 ) 
  
 . 
 table 
 ( 
 tableId 
 ) 
  
 . 
  load 
 
 ( 
 storage 
 . 
 bucket 
 ( 
 bucketName 
 ). 
 file 
 ( 
 filename 
 ), 
  
 metadata 
 ); 
  
 // load() waits for the job to finish 
  
 console 
 . 
 log 
 ( 
 `Job 
 ${ 
  job 
 
 . 
 id 
 } 
 completed.` 
 ); 
 } 
 

PHP

Before trying this sample, follow the PHP setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery PHP API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  use Google\Cloud\BigQuery\BigQueryClient; 
 /** 
 * Import data from storage parquet. 
 * 
 * @param string $projectId The project Id of your Google Cloud Project. 
 * @param string $datasetId The BigQuery dataset ID. 
 * @param string $tableId The BigQuery table ID. 
 */ 
 function import_from_storage_parquet( 
 string $projectId, 
 string $datasetId, 
 string $tableId = 'us_states' 
 ): void { 
 // instantiate the bigquery table service 
 $bigQuery = new BigQueryClient([ 
 'projectId' => $projectId, 
 ]); 
 $dataset = $bigQuery->dataset($datasetId); 
 $table = $dataset->table($tableId); 
 // create the import job 
 $gcsUri = 'gs://cloud-samples-data/bigquery/us-states/us-states.parquet'; 
 $loadConfig = $table->loadFromStorage($gcsUri)->sourceFormat('PARQUET'); 
 $job = $table->runJob($loadConfig); 
 // check if the job is complete 
 $job->reload(); 
 if (!$job->isComplete()) { 
 throw new \Exception('Job has not yet completed', 500); 
 } 
 // check if the job has errors 
 if (isset($job->info()['status']['errorResult'])) { 
 $error = $job->info()['status']['errorResult']['message']; 
 printf('Error running job: %s' . PHP_EOL, $error); 
 } else { 
 print('Data imported successfully' . PHP_EOL); 
 } 
 } 
 

Python

Before trying this sample, follow the Python setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Python API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  from 
  
 google.cloud 
  
 import 
  bigquery 
 
 # Construct a BigQuery client object. 
 client 
 = 
  bigquery 
 
 . 
  Client 
 
 () 
 # TODO(developer): Set table_id to the ID of the table to create. 
 # table_id = "your-project.your_dataset.your_table_name" 
 job_config 
 = 
  bigquery 
 
 . 
  LoadJobConfig 
 
 ( 
 source_format 
 = 
  bigquery 
 
 . 
  SourceFormat 
 
 . 
 PARQUET 
 , 
 ) 
 uri 
 = 
 "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" 
 load_job 
 = 
 client 
 . 
  load_table_from_uri 
 
 ( 
 uri 
 , 
 table_id 
 , 
 job_config 
 = 
 job_config 
 ) 
 # Make an API request. 
 load_job 
 . 
 result 
 () 
 # Waits for the job to complete. 
 destination_table 
 = 
 client 
 . 
  get_table 
 
 ( 
 table_id 
 ) 
 print 
 ( 
 "Loaded 
 {} 
 rows." 
 . 
 format 
 ( 
 destination_table 
 . 
  num_rows 
 
 )) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Design a Mobile Site
View Site in Mobile | Classic
Share by: