Clustered table

Load data from a CSV file on Cloud Storage to a clustered table.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Go

Before trying this sample, follow the Go setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Go API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "cloud.google.com/go/bigquery" 
 ) 
 // importClusteredTable demonstrates creating a table from a load job and defining partitioning and clustering 
 // properties. 
 func 
  
 importClusteredTable 
 ( 
 projectID 
 , 
  
 destDatasetID 
 , 
  
 destTableID 
  
 string 
 ) 
  
 error 
  
 { 
  
 // projectID := "my-project-id" 
  
 // datasetID := "mydataset" 
  
 // tableID := "mytable" 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 bigquery 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
 projectID 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "bigquery.NewClient: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 gcsRef 
  
 := 
  
 bigquery 
 . 
  NewGCSReference 
 
 ( 
 "gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv" 
 ) 
  
 gcsRef 
 . 
 SkipLeadingRows 
  
 = 
  
 1 
  
 gcsRef 
 . 
  Schema 
 
  
 = 
  
 bigquery 
 . 
  Schema 
 
 { 
  
 { 
 Name 
 : 
  
 "timestamp" 
 , 
  
 Type 
 : 
  
 bigquery 
 . 
  TimestampFieldType 
 
 }, 
  
 { 
 Name 
 : 
  
 "origin" 
 , 
  
 Type 
 : 
  
 bigquery 
 . 
  StringFieldType 
 
 }, 
  
 { 
 Name 
 : 
  
 "destination" 
 , 
  
 Type 
 : 
  
 bigquery 
 . 
  StringFieldType 
 
 }, 
  
 { 
 Name 
 : 
  
 "amount" 
 , 
  
 Type 
 : 
  
 bigquery 
 . 
  NumericFieldType 
 
 }, 
  
 } 
  
 loader 
  
 := 
  
 client 
 . 
 Dataset 
 ( 
 destDatasetID 
 ). 
 Table 
 ( 
 destTableID 
 ). 
  LoaderFrom 
 
 ( 
 gcsRef 
 ) 
  
 loader 
 . 
  TimePartitioning 
 
  
 = 
  
& bigquery 
 . 
  TimePartitioning 
 
 { 
  
 Field 
 : 
  
 "timestamp" 
 , 
  
 } 
  
 loader 
 . 
  Clustering 
 
  
 = 
  
& bigquery 
 . 
  Clustering 
 
 { 
  
 Fields 
 : 
  
 [] 
 string 
 { 
 "origin" 
 , 
  
 "destination" 
 }, 
  
 } 
  
 loader 
 . 
 WriteDisposition 
  
 = 
  
 bigquery 
 . 
  WriteEmpty 
 
  
 job 
 , 
  
 err 
  
 := 
  
 loader 
 . 
 Run 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 status 
 , 
  
 err 
  
 := 
  
 job 
 . 
 Wait 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 if 
  
 status 
 . 
  Err 
 
 () 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "job completed with error: %w" 
 , 
  
 status 
 . 
  Err 
 
 ()) 
  
 } 
  
 return 
  
 nil 
 } 
 

Java

Before trying this sample, follow the Java setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Java API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  import 
  
 com.google.cloud.bigquery. BigQuery 
 
 ; 
 import 
  
 com.google.cloud.bigquery. BigQueryException 
 
 ; 
 import 
  
 com.google.cloud.bigquery. BigQueryOptions 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Clustering 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Field 
 
 ; 
 import 
  
 com.google.cloud.bigquery. FormatOptions 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Job 
 
 ; 
 import 
  
 com.google.cloud.bigquery. JobInfo 
 
 ; 
 import 
  
 com.google.cloud.bigquery. LoadJobConfiguration 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Schema 
 
 ; 
 import 
  
 com.google.cloud.bigquery. StandardSQLTypeName 
 
 ; 
 import 
  
 com.google.cloud.bigquery. TableId 
 
 ; 
 import 
  
 com.google.cloud.bigquery. TimePartitioning 
 
 ; 
 import 
  
 com.google.common.collect.ImmutableList 
 ; 
 import 
  
 java.util.List 
 ; 
 // Sample to load clustered table. 
 public 
  
 class 
 LoadTableClustered 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 datasetName 
  
 = 
  
 "MY_DATASET_NAME" 
 ; 
  
 String 
  
 tableName 
  
 = 
  
 "MY_TABLE_NAME" 
 ; 
  
 String 
  
 sourceUri 
  
 = 
  
 "/path/to/file.csv" 
 ; 
  
  Schema 
 
  
 schema 
  
 = 
  
  Schema 
 
 . 
 of 
 ( 
  
  Field 
 
 . 
 of 
 ( 
 "name" 
 , 
  
  StandardSQLTypeName 
 
 . 
 STRING 
 ), 
  
  Field 
 
 . 
 of 
 ( 
 "post_abbr" 
 , 
  
  StandardSQLTypeName 
 
 . 
 STRING 
 ), 
  
  Field 
 
 . 
 of 
 ( 
 "date" 
 , 
  
  StandardSQLTypeName 
 
 . 
 DATE 
 )); 
  
 loadTableClustered 
 ( 
  
 datasetName 
 , 
  
 tableName 
 , 
  
 sourceUri 
 , 
  
 schema 
 , 
  
 ImmutableList 
 . 
 of 
 ( 
 "name" 
 , 
  
 "post_abbr" 
 )); 
  
 } 
  
 public 
  
 static 
  
 void 
  
 loadTableClustered 
 ( 
  
 String 
  
 datasetName 
 , 
  
 String 
  
 tableName 
 , 
  
 String 
  
 sourceUri 
 , 
  
  Schema 
 
  
 schema 
 , 
  
 List<String> 
  
 clusteringFields 
 ) 
  
 { 
  
 try 
  
 { 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. 
  
  BigQuery 
 
  
 bigquery 
  
 = 
  
  BigQueryOptions 
 
 . 
 getDefaultInstance 
 (). 
 getService 
 (); 
  
  TableId 
 
  
 tableId 
  
 = 
  
  TableId 
 
 . 
 of 
 ( 
 datasetName 
 , 
  
 tableName 
 ); 
  
  TimePartitioning 
 
  
 partitioning 
  
 = 
  
  TimePartitioning 
 
 . 
 of 
 ( 
  TimePartitioning 
 
 . 
 Type 
 . 
 DAY 
 ); 
  
 // Clustering fields will be consisted of fields mentioned in the schema. 
  
 // BigQuery supports clustering for both partitioned and non-partitioned tables. 
  
  Clustering 
 
  
 clustering 
  
 = 
  
  Clustering 
 
 . 
 newBuilder 
 (). 
 setFields 
 ( 
 clusteringFields 
 ). 
 build 
 (); 
  
  LoadJobConfiguration 
 
  
 loadJobConfig 
  
 = 
  
  LoadJobConfiguration 
 
 . 
 builder 
 ( 
 tableId 
 , 
  
 sourceUri 
 ) 
  
 . 
 setFormatOptions 
 ( 
  FormatOptions 
 
 . 
  csv 
 
 ()) 
  
 . 
 setSchema 
 ( 
 schema 
 ) 
  
 . 
 setTimePartitioning 
 ( 
 partitioning 
 ) 
  
 . 
 setClustering 
 ( 
 clustering 
 ) 
  
 . 
 build 
 (); 
  
  Job 
 
  
 loadJob 
  
 = 
  
 bigquery 
 . 
  create 
 
 ( 
 JobInfo 
 . 
 newBuilder 
 ( 
 loadJobConfig 
 ). 
 build 
 ()); 
  
 // Load data from a GCS parquet file into the table 
  
 // Blocks until this load table job completes its execution, either failing or succeeding. 
  
  Job 
 
  
 job 
  
 = 
  
 loadJob 
 . 
  waitFor 
 
 (); 
  
 // Check for errors 
  
 if 
  
 ( 
 job 
 . 
  isDone 
 
 () 
 && 
 job 
 . 
 getStatus 
 (). 
 getError 
 () 
  
 == 
  
 null 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Data successfully loaded into clustered table during load job" 
 ); 
  
 } 
  
 else 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
  
 "BigQuery was unable to load into the table due to an error:" 
  
 + 
  
 job 
 . 
 getStatus 
 (). 
 getError 
 ()); 
  
 } 
  
 } 
  
 catch 
  
 ( 
  BigQueryException 
 
  
 | 
  
 InterruptedException 
  
 e 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Data not loaded into clustered table during load job \n" 
  
 + 
  
 e 
 . 
 toString 
 ()); 
  
 } 
  
 } 
 } 
 

Node.js

Before trying this sample, follow the Node.js setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Node.js API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  // Import the Google Cloud client library 
 const 
  
 { 
 BigQuery 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/bigquery 
' 
 ); 
 const 
  
 { 
 Storage 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/storage 
' 
 ); 
 // Instantiate clients 
 const 
  
 bigquery 
  
 = 
  
 new 
  
  BigQuery 
 
 (); 
 const 
  
 storage 
  
 = 
  
 new 
  
 Storage 
 (); 
 /** 
 * This sample loads the CSV file at 
 * https://storage.googleapis.com/cloud-samples-data/sample-transactions/transactions.csv 
 * 
 * TODO(developer): Replace the following lines with the path to your file. 
 */ 
 const 
  
 bucketName 
  
 = 
  
 'cloud-samples-data' 
 ; 
 const 
  
 filename 
  
 = 
  
 'bigquery/sample-transactions/transactions.csv' 
 ; 
 async 
  
 function 
  
 loadTableClustered 
 () 
  
 { 
  
 // Loads a new clustered table named "my_table" in "my_dataset". 
  
 /** 
 * TODO(developer): Uncomment the following lines before running the sample. 
 */ 
  
 // const datasetId = "my_dataset"; 
  
 // const tableId = "my_table"; 
  
 const 
  
 metadata 
  
 = 
  
 { 
  
 sourceFormat 
 : 
  
 'CSV' 
 , 
  
 skipLeadingRows 
 : 
  
 1 
 , 
  
 schema 
 : 
  
 { 
  
 fields 
 : 
  
 [ 
  
 { 
 name 
 : 
  
 'timestamp' 
 , 
  
 type 
 : 
  
 'TIMESTAMP' 
 }, 
  
 { 
 name 
 : 
  
 'origin' 
 , 
  
 type 
 : 
  
 'STRING' 
 }, 
  
 { 
 name 
 : 
  
 'destination' 
 , 
  
 type 
 : 
  
 'STRING' 
 }, 
  
 { 
 name 
 : 
  
 'amount' 
 , 
  
 type 
 : 
  
 'NUMERIC' 
 }, 
  
 ], 
  
 }, 
  
 clustering 
 : 
  
 { 
  
 fields 
 : 
  
 [ 
 'origin' 
 , 
  
 'destination' 
 ], 
  
 }, 
  
 }; 
  
 // Load data from a Google Cloud Storage file into the table 
  
 const 
  
 [ 
 job 
 ] 
  
 = 
  
 await 
  
 bigquery 
  
 . 
 dataset 
 ( 
 datasetId 
 ) 
  
 . 
 table 
 ( 
 tableId 
 ) 
  
 . 
  load 
 
 ( 
 storage 
 . 
 bucket 
 ( 
 bucketName 
 ). 
 file 
 ( 
 filename 
 ), 
  
 metadata 
 ); 
  
 // load() waits for the job to finish 
  
 console 
 . 
 log 
 ( 
 `Job 
 ${ 
  job 
 
 . 
 id 
 } 
 completed.` 
 ); 
 } 
 

Python

Before trying this sample, follow the Python setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Python API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  from 
  
 google.cloud 
  
 import 
  bigquery 
 
 # Construct a BigQuery client object. 
 client 
 = 
  bigquery 
 
 . 
  Client 
 
 () 
 # TODO(developer): Set table_id to the ID of the table to create. 
 # table_id = "your-project.your_dataset.your_table_name" 
 job_config 
 = 
  bigquery 
 
 . 
  LoadJobConfig 
 
 ( 
 skip_leading_rows 
 = 
 1 
 , 
 source_format 
 = 
  bigquery 
 
 . 
  SourceFormat 
 
 . 
 CSV 
 , 
 schema 
 = 
 [ 
  bigquery 
 
 . 
  SchemaField 
 
 ( 
 "timestamp" 
 , 
  bigquery 
 
 . 
  SqlTypeNames 
 
 . 
 TIMESTAMP 
 ), 
  bigquery 
 
 . 
  SchemaField 
 
 ( 
 "origin" 
 , 
  bigquery 
 
 . 
  SqlTypeNames 
 
 . 
  STRING 
 
 ), 
  bigquery 
 
 . 
  SchemaField 
 
 ( 
 "destination" 
 , 
  bigquery 
 
 . 
  SqlTypeNames 
 
 . 
  STRING 
 
 ), 
  bigquery 
 
 . 
  SchemaField 
 
 ( 
 "amount" 
 , 
  bigquery 
 
 . 
  SqlTypeNames 
 
 . 
  NUMERIC 
 
 ), 
 ], 
 time_partitioning 
 = 
  bigquery 
 
 . 
  TimePartitioning 
 
 ( 
 field 
 = 
 "timestamp" 
 ), 
 clustering_fields 
 = 
 [ 
 "origin" 
 , 
 "destination" 
 ], 
 ) 
 job 
 = 
 client 
 . 
  load_table_from_uri 
 
 ( 
 [ 
 "gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv" 
 ], 
 table_id 
 , 
 job_config 
 = 
 job_config 
 , 
 ) 
  job 
 
 . 
 result 
 () 
 # Waits for the job to complete. 
 table 
 = 
 client 
 . 
  get_table 
 
 ( 
 table_id 
 ) 
 # Make an API request. 
 print 
 ( 
 "Loaded 
 {} 
 rows and 
 {} 
 columns to 
 {} 
 " 
 . 
 format 
 ( 
 table 
 . 
  num_rows 
 
 , 
 len 
 ( 
 table 
 . 
 schema 
 ), 
 table_id 
 ) 
 ) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: