Create a dataset for tabular Cloud Storage

Creates a dataset for tabular Cloud Storage using the create_dataset method.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.api.gax.longrunning. OperationFuture 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. CreateDatasetOperationMetadata 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. Dataset 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DatasetServiceClient 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DatasetServiceSettings 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. LocationName 
 
 ; 
 import 
  
 com.google.protobuf. Value 
 
 ; 
 import 
  
 com.google.protobuf.util. JsonFormat 
 
 ; 
 import 
  
 java.io.IOException 
 ; 
 import 
  
 java.util.concurrent.ExecutionException 
 ; 
 import 
  
 java.util.concurrent.TimeUnit 
 ; 
 import 
  
 java.util.concurrent.TimeoutException 
 ; 
 public 
  
 class 
 CreateDatasetTabularGcsSample 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 throws 
  
 InterruptedException 
 , 
  
 ExecutionException 
 , 
  
 TimeoutException 
 , 
  
 IOException 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 project 
  
 = 
  
 "YOUR_PROJECT_ID" 
 ; 
  
 String 
  
 datasetDisplayName 
  
 = 
  
 "YOUR_DATASET_DISPLAY_NAME" 
 ; 
  
 String 
  
 gcsSourceUri 
  
 = 
  
 "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_gcs_table/file.csv" 
 ; 
  
 ; 
  
 createDatasetTableGcs 
 ( 
 project 
 , 
  
 datasetDisplayName 
 , 
  
 gcsSourceUri 
 ); 
  
 } 
  
 static 
  
 void 
  
 createDatasetTableGcs 
 ( 
 String 
  
 project 
 , 
  
 String 
  
 datasetDisplayName 
 , 
  
 String 
  
 gcsSourceUri 
 ) 
  
 throws 
  
 IOException 
 , 
  
 ExecutionException 
 , 
  
 InterruptedException 
 , 
  
 TimeoutException 
  
 { 
  
  DatasetServiceSettings 
 
  
 settings 
  
 = 
  
  DatasetServiceSettings 
 
 . 
 newBuilder 
 () 
  
 . 
 setEndpoint 
 ( 
 "us-central1-aiplatform.googleapis.com:443" 
 ) 
  
 . 
 build 
 (); 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. After completing all of your requests, call 
  
 // the "close" method on the client to safely clean up any remaining background resources. 
  
 try 
  
 ( 
  DatasetServiceClient 
 
  
 datasetServiceClient 
  
 = 
  
  DatasetServiceClient 
 
 . 
 create 
 ( 
 settings 
 )) 
  
 { 
  
 String 
  
 location 
  
 = 
  
 "us-central1" 
 ; 
  
 String 
  
 metadataSchemaUri 
  
 = 
  
 "gs://google-cloud-aiplatform/schema/dataset/metadata/tables_1.0.0.yaml" 
 ; 
  
  LocationName 
 
  
 locationName 
  
 = 
  
  LocationName 
 
 . 
 of 
 ( 
 project 
 , 
  
 location 
 ); 
  
 String 
  
 jsonString 
  
 = 
  
 "{\"input_config\": {\"gcs_source\": {\"uri\": [\"" 
  
 + 
  
 gcsSourceUri 
  
 + 
  
 "\"]}}}" 
 ; 
  
  Value 
 
 . 
 Builder 
  
 metaData 
  
 = 
  
  Value 
 
 . 
 newBuilder 
 (); 
  
  JsonFormat 
 
 . 
 parser 
 (). 
 merge 
 ( 
 jsonString 
 , 
  
 metaData 
 ); 
  
  Dataset 
 
  
 dataset 
  
 = 
  
  Dataset 
 
 . 
 newBuilder 
 () 
  
 . 
 setDisplayName 
 ( 
 datasetDisplayName 
 ) 
  
 . 
 setMetadataSchemaUri 
 ( 
 metadataSchemaUri 
 ) 
  
 . 
 setMetadata 
 ( 
 metaData 
 ) 
  
 . 
 build 
 (); 
  
 OperationFuture<Dataset 
 , 
  
 CreateDatasetOperationMetadata 
>  
 datasetFuture 
  
 = 
  
 datasetServiceClient 
 . 
  createDatasetAsync 
 
 ( 
 locationName 
 , 
  
 dataset 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Operation name: %s\n" 
 , 
  
 datasetFuture 
 . 
 getInitialFuture 
 (). 
 get 
 (). 
 getName 
 ()); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Waiting for operation to finish..." 
 ); 
  
  Dataset 
 
  
 datasetResponse 
  
 = 
  
 datasetFuture 
 . 
 get 
 ( 
 300 
 , 
  
 TimeUnit 
 . 
 SECONDS 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Create Dataset Table GCS sample" 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Name: %s\n" 
 , 
  
 datasetResponse 
 . 
  getName 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Display Name: %s\n" 
 , 
  
 datasetResponse 
 . 
  getDisplayName 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Metadata Schema Uri: %s\n" 
 , 
  
 datasetResponse 
 . 
  getMetadataSchemaUri 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Metadata: %s\n" 
 , 
  
 datasetResponse 
 . 
  getMetadata 
 
 ()); 
  
 } 
  
 } 
 } 
 

Node.js

Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Node.js API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  /** 
 * TODO(developer): Uncomment these variables before running the sample.\ 
 * (Not necessary if passing values as arguments) 
 */ 
 // const datasetDisplayName = 'YOUR_DATASET_DISPLAY_NAME'; 
 // const gcsSourceUri = 'YOUR_GCS_SOURCE_URI'; 
 // const project = 'YOUR_PROJECT_ID'; 
 // const location = 'YOUR_PROJECT_LOCATION'; 
 // Imports the Google Cloud Dataset Service Client library 
 const 
  
 { 
 DatasetServiceClient 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/aiplatform 
' 
 ); 
 // Specifies the location of the api endpoint 
 const 
  
 clientOptions 
  
 = 
  
 { 
  
 apiEndpoint 
 : 
  
 'us-central1-aiplatform.googleapis.com' 
 , 
 }; 
 // Instantiates a client 
 const 
  
 datasetServiceClient 
  
 = 
  
 new 
  
  DatasetServiceClient 
 
 ( 
 clientOptions 
 ); 
 async 
  
 function 
  
 createDatasetTabularGcs 
 () 
  
 { 
  
 // Configure the parent resource 
  
 const 
  
 parent 
  
 = 
  
 `projects/ 
 ${ 
 project 
 } 
 /locations/ 
 ${ 
 location 
 } 
 ` 
 ; 
  
 const 
  
 metadata 
  
 = 
  
 { 
  
 structValue 
 : 
  
 { 
  
 fields 
 : 
  
 { 
  
 inputConfig 
 : 
  
 { 
  
 structValue 
 : 
  
 { 
  
 fields 
 : 
  
 { 
  
 gcsSource 
 : 
  
 { 
  
 structValue 
 : 
  
 { 
  
 fields 
 : 
  
 { 
  
 uri 
 : 
  
 { 
  
 listValue 
 : 
  
 { 
  
 values 
 : 
  
 [{ 
 stringValue 
 : 
  
 gcsSourceUri 
 }], 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }; 
  
 // Configure the dataset resource 
  
 const 
  
 dataset 
  
 = 
  
 { 
  
 displayName 
 : 
  
 datasetDisplayName 
 , 
  
 metadataSchemaUri 
 : 
  
 'gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml' 
 , 
  
 metadata 
 : 
  
 metadata 
 , 
  
 }; 
  
 const 
  
 request 
  
 = 
  
 { 
  
 parent 
 , 
  
 dataset 
 , 
  
 }; 
  
 // Create dataset request 
  
 const 
  
 [ 
 response 
 ] 
  
 = 
  
 await 
  
 datasetServiceClient 
 . 
 createDataset 
 ( 
 request 
 ); 
  
 console 
 . 
 log 
 ( 
 `Long running operation : 
 ${ 
 response 
 . 
 name 
 } 
 ` 
 ); 
  
 // Wait for operation to complete 
  
 await 
  
 response 
 . 
 promise 
 (); 
  
 const 
  
 result 
  
 = 
  
 response 
 . 
 result 
 ; 
  
 console 
 . 
 log 
 ( 
 'Create dataset tabular gcs response' 
 ); 
  
 console 
 . 
 log 
 ( 
 `\tName : 
 ${ 
 result 
 . 
 name 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `\tDisplay name : 
 ${ 
 result 
 . 
 displayName 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `\tMetadata schema uri : 
 ${ 
 result 
 . 
 metadataSchemaUri 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `\tMetadata : 
 ${ 
 JSON 
 . 
 stringify 
 ( 
 result 
 . 
 metadata 
 ) 
 } 
 ` 
 ); 
 } 
 createDatasetTabularGcs 
 (); 
 

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google.cloud 
  
 import 
 aiplatform 
 from 
  
 google.protobuf 
  
 import 
 json_format 
 from 
  
 google.protobuf.struct_pb2 
  
 import 
 Value 
 def 
  
 create_dataset_tabular_gcs_sample 
 ( 
 project 
 : 
 str 
 , 
 display_name 
 : 
 str 
 , 
 gcs_uri 
 : 
 str 
 , 
 location 
 : 
 str 
 = 
 "us-central1" 
 , 
 api_endpoint 
 : 
 str 
 = 
 "us-central1-aiplatform.googleapis.com" 
 , 
 timeout 
 : 
 int 
 = 
 300 
 , 
 ): 
 # The AI Platform services require regional API endpoints. 
 client_options 
 = 
 { 
 "api_endpoint" 
 : 
 api_endpoint 
 } 
 # Initialize client that will be used to create and send requests. 
 # This client only needs to be created once, and can be reused for multiple requests. 
 client 
 = 
 aiplatform 
 . 
 gapic 
 . 
  DatasetServiceClient 
 
 ( 
 client_options 
 = 
 client_options 
 ) 
 metadata_dict 
 = 
 { 
 "input_config" 
 : 
 { 
 "gcs_source" 
 : 
 { 
 "uri" 
 : 
 [ 
 gcs_uri 
 ]}}} 
 metadata 
 = 
 json_format 
 . 
 ParseDict 
 ( 
 metadata_dict 
 , 
 Value 
 ()) 
 dataset 
 = 
 { 
 "display_name" 
 : 
 display_name 
 , 
 "metadata_schema_uri" 
 : 
 "gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml" 
 , 
 "metadata" 
 : 
 metadata 
 , 
 } 
 parent 
 = 
 f 
 "projects/ 
 { 
 project 
 } 
 /locations/ 
 { 
 location 
 } 
 " 
 response 
 = 
 client 
 . 
  create_dataset 
 
 ( 
 parent 
 = 
 parent 
 , 
 dataset 
 = 
 dataset 
 ) 
 print 
 ( 
 "Long running operation:" 
 , 
 response 
 . 
 operation 
 . 
 name 
 ) 
 create_dataset_response 
 = 
 response 
 . 
 result 
 ( 
 timeout 
 = 
 timeout 
 ) 
 print 
 ( 
 "create_dataset_response:" 
 , 
 create_dataset_response 
 ) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: