Query Cloud Storage with a permanent table

Query data from a file on Cloud Storage by creating a permanent table.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

Before trying this sample, follow the Java setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Java API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  import 
  
 com.google.cloud.bigquery. BigQuery 
 
 ; 
 import 
  
 com.google.cloud.bigquery. BigQueryException 
 
 ; 
 import 
  
 com.google.cloud.bigquery. BigQueryOptions 
 
 ; 
 import 
  
 com.google.cloud.bigquery. CsvOptions 
 
 ; 
 import 
  
 com.google.cloud.bigquery. ExternalTableDefinition 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Field 
 
 ; 
 import 
  
 com.google.cloud.bigquery. QueryJobConfiguration 
 
 ; 
 import 
  
 com.google.cloud.bigquery. Schema 
 
 ; 
 import 
  
 com.google.cloud.bigquery. StandardSQLTypeName 
 
 ; 
 import 
  
 com.google.cloud.bigquery. TableId 
 
 ; 
 import 
  
 com.google.cloud.bigquery. TableInfo 
 
 ; 
 import 
  
 com.google.cloud.bigquery. TableResult 
 
 ; 
 // Sample to queries an external data source using a permanent table 
 public 
  
 class 
 QueryExternalGcsPerm 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 datasetName 
  
 = 
  
 "MY_DATASET_NAME" 
 ; 
  
 String 
  
 tableName 
  
 = 
  
 "MY_TABLE_NAME" 
 ; 
  
 String 
  
 sourceUri 
  
 = 
  
 "gs://cloud-samples-data/bigquery/us-states/us-states.csv" 
 ; 
  
  Schema 
 
  
 schema 
  
 = 
  
  Schema 
 
 . 
 of 
 ( 
  
  Field 
 
 . 
 of 
 ( 
 "name" 
 , 
  
  StandardSQLTypeName 
 
 . 
 STRING 
 ), 
  
  Field 
 
 . 
 of 
 ( 
 "post_abbr" 
 , 
  
  StandardSQLTypeName 
 
 . 
 STRING 
 )); 
  
 String 
  
 query 
  
 = 
  
 String 
 . 
 format 
 ( 
 "SELECT * FROM %s.%s WHERE name LIKE 'W%%'" 
 , 
  
 datasetName 
 , 
  
 tableName 
 ); 
  
 queryExternalGcsPerm 
 ( 
 datasetName 
 , 
  
 tableName 
 , 
  
 sourceUri 
 , 
  
 schema 
 , 
  
 query 
 ); 
  
 } 
  
 public 
  
 static 
  
 void 
  
 queryExternalGcsPerm 
 ( 
  
 String 
  
 datasetName 
 , 
  
 String 
  
 tableName 
 , 
  
 String 
  
 sourceUri 
 , 
  
  Schema 
 
  
 schema 
 , 
  
 String 
  
 query 
 ) 
  
 { 
  
 try 
  
 { 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. 
  
  BigQuery 
 
  
 bigquery 
  
 = 
  
  BigQueryOptions 
 
 . 
 getDefaultInstance 
 (). 
 getService 
 (); 
  
 // Skip header row in the file. 
  
  CsvOptions 
 
  
 csvOptions 
  
 = 
  
  CsvOptions 
 
 . 
 newBuilder 
 (). 
 setSkipLeadingRows 
 ( 
 1 
 ). 
 build 
 (); 
  
  TableId 
 
  
 tableId 
  
 = 
  
  TableId 
 
 . 
 of 
 ( 
 datasetName 
 , 
  
 tableName 
 ); 
  
 // Create a permanent table linked to the GCS file 
  
  ExternalTableDefinition 
 
  
 externalTable 
  
 = 
  
  ExternalTableDefinition 
 
 . 
 newBuilder 
 ( 
 sourceUri 
 , 
  
 csvOptions 
 ). 
 setSchema 
 ( 
 schema 
 ). 
 build 
 (); 
  
 bigquery 
 . 
  create 
 
 ( 
 TableInfo 
 . 
 of 
 ( 
 tableId 
 , 
  
 externalTable 
 )); 
  
 // Example query to find states starting with 'W' 
  
  TableResult 
 
  
 results 
  
 = 
  
 bigquery 
 . 
  query 
 
 ( 
  QueryJobConfiguration 
 
 . 
 of 
 ( 
 query 
 )); 
  
 results 
  
 . 
  iterateAll 
 
 () 
  
 . 
 forEach 
 ( 
 row 
  
 - 
>  
 row 
 . 
 forEach 
 ( 
 val 
  
 - 
>  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "%s," 
 , 
  
 val 
 . 
 toString 
 ()))); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Query on external permanent table performed successfully." 
 ); 
  
 } 
  
 catch 
  
 ( 
  BigQueryException 
 
  
 | 
  
 InterruptedException 
  
 e 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Query not performed \n" 
  
 + 
  
 e 
 . 
 toString 
 ()); 
  
 } 
  
 } 
 } 
 

Node.js

Before trying this sample, follow the Node.js setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Node.js API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  // Import the Google Cloud client library and create a client 
 const 
  
 { 
 BigQuery 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/bigquery 
' 
 ); 
 const 
  
 bigquery 
  
 = 
  
 new 
  
  BigQuery 
 
 (); 
 async 
  
 function 
  
 queryExternalGCSPerm 
 () 
  
 { 
  
 // Queries an external data source using a permanent table 
  
 /** 
 * TODO(developer): Uncomment the following lines before running the sample. 
 */ 
  
 // const datasetId = "my_dataset"; 
  
 // const tableId = "my_table"; 
  
 // Configure the external data source 
  
 const 
  
 dataConfig 
  
 = 
  
 { 
  
 sourceFormat 
 : 
  
 'CSV' 
 , 
  
 sourceUris 
 : 
  
 [ 
 'gs://cloud-samples-data/bigquery/us-states/us-states.csv' 
 ], 
  
 // Optionally skip header row 
  
 csvOptions 
 : 
  
 { 
 skipLeadingRows 
 : 
  
 1 
 }, 
  
 }; 
  
 // For all options, see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource 
  
 const 
  
 options 
  
 = 
  
 { 
  
 schema 
 : 
  
 schema 
 , 
  
 externalDataConfiguration 
 : 
  
 dataConfig 
 , 
  
 }; 
  
 // Create an external table linked to the GCS file 
  
 const 
  
 [ 
 table 
 ] 
  
 = 
  
 await 
  
 bigquery 
  
 . 
 dataset 
 ( 
 datasetId 
 ) 
  
 . 
  createTable 
 
 ( 
 tableId 
 , 
  
 options 
 ); 
  
 console 
 . 
 log 
 ( 
 `Table 
 ${ 
 table 
 . 
 id 
 } 
 created.` 
 ); 
  
 // Example query to find states starting with 'W' 
  
 const 
  
 query 
  
 = 
  
 `SELECT post_abbr 
 FROM \` 
 ${ 
 datasetId 
 } 
 . 
 ${ 
 tableId 
 } 
 \` 
 WHERE name LIKE 'W%'` 
 ; 
  
 // Run the query as a job 
  
 const 
  
 [ 
 job 
 ] 
  
 = 
  
 await 
  
 bigquery 
 . 
 createQueryJob 
 ( 
 query 
 ); 
  
 console 
 . 
 log 
 ( 
 `Job 
 ${ 
  job 
 
 . 
 id 
 } 
 started.` 
 ); 
  
 // Wait for the query to finish 
  
 const 
  
 [ 
 rows 
 ] 
  
 = 
  
 await 
  
  job 
 
 . 
  getQueryResults 
 
 (); 
  
 // Print the results 
  
 console 
 . 
 log 
 ( 
 'Rows:' 
 ); 
  
 console 
 . 
 log 
 ( 
 rows 
 ); 
 } 
 

Python

Before trying this sample, follow the Python setup instructions in the BigQuery quickstart using client libraries . For more information, see the BigQuery Python API reference documentation .

To authenticate to BigQuery, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  from 
  
 google.cloud 
  
 import 
  bigquery 
 
 # Construct a BigQuery client object. 
 client 
 = 
  bigquery 
 
 . 
  Client 
 
 () 
 # TODO(developer): Set table_id to the ID of the table to create. 
 table_id 
 = 
 "your-project.your_dataset.your_table_name" 
 # TODO(developer): Set the external source format of your table. 
 # Note that the set of allowed values for external data sources is 
 # different than the set used for loading data (see :class:`~google.cloud.bigquery.job.SourceFormat`). 
 external_source_format 
 = 
 "AVRO" 
 # TODO(developer): Set the source_uris to point to your data in Google Cloud 
 source_uris 
 = 
 [ 
 "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro" 
 , 
 "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" 
 , 
 "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro" 
 , 
 ] 
 # Create ExternalConfig object with external source format 
 external_config 
 = 
  bigquery 
 
 . 
  ExternalConfig 
 
 ( 
 external_source_format 
 ) 
 # Set source_uris that point to your data in Google Cloud 
  external_config 
 
 . 
 source_uris 
 = 
 source_uris 
 # TODO(developer) You have the option to set a reference_file_schema_uri, which points to 
 # a reference file for the table schema 
 reference_file_schema_uri 
 = 
 "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" 
  external_config 
 
 . 
 reference_file_schema_uri 
 = 
 reference_file_schema_uri 
 table 
 = 
  bigquery 
 
 . 
  Table 
 
 ( 
 table_id 
 ) 
 # Set the external data configuration of the table 
 table 
 . 
  external_data_configuration 
 
 = 
 external_config 
 table 
 = 
 client 
 . 
  create_table 
 
 ( 
 table 
 ) 
 # Make an API request. 
 print 
 ( 
 f 
 "Created table with external source format 
 { 
 table 
 . 
  external_data_configuration 
 
 . 
 source_format 
 } 
 " 
 ) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: