Deploy a model for custom trained model

Deploys a model for custom trained model using the deploy_model method.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.api.gax.longrunning. OperationFuture 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DedicatedResources 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DeployModelOperationMetadata 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DeployModelResponse 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DeployedModel 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. EndpointName 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. EndpointServiceClient 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. EndpointServiceSettings 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. MachineSpec 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. ModelName 
 
 ; 
 import 
  
 java.io.IOException 
 ; 
 import 
  
 java.util.HashMap 
 ; 
 import 
  
 java.util.Map 
 ; 
 import 
  
 java.util.concurrent.ExecutionException 
 ; 
 public 
  
 class 
 DeployModelCustomTrainedModelSample 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 throws 
  
 IOException 
 , 
  
 ExecutionException 
 , 
  
 InterruptedException 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 project 
  
 = 
  
 "PROJECT" 
 ; 
  
 String 
  
 endpointId 
  
 = 
  
 "ENDPOINT_ID" 
 ; 
  
 String 
  
 modelName 
  
 = 
  
 "MODEL_NAME" 
 ; 
  
 String 
  
 deployedModelDisplayName 
  
 = 
  
 "DEPLOYED_MODEL_DISPLAY_NAME" 
 ; 
  
 deployModelCustomTrainedModelSample 
 ( 
 project 
 , 
  
 endpointId 
 , 
  
 modelName 
 , 
  
 deployedModelDisplayName 
 ); 
  
 } 
  
 static 
  
 void 
  
 deployModelCustomTrainedModelSample 
 ( 
  
 String 
  
 project 
 , 
  
 String 
  
 endpointId 
 , 
  
 String 
  
 model 
 , 
  
 String 
  
 deployedModelDisplayName 
 ) 
  
 throws 
  
 IOException 
 , 
  
 ExecutionException 
 , 
  
 InterruptedException 
  
 { 
  
  EndpointServiceSettings 
 
  
 settings 
  
 = 
  
  EndpointServiceSettings 
 
 . 
 newBuilder 
 () 
  
 . 
 setEndpoint 
 ( 
 "us-central1-aiplatform.googleapis.com:443" 
 ) 
  
 . 
 build 
 (); 
  
 String 
  
 location 
  
 = 
  
 "us-central1" 
 ; 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. After completing all of your requests, call 
  
 // the "close" method on the client to safely clean up any remaining background resources. 
  
 try 
  
 ( 
  EndpointServiceClient 
 
  
 client 
  
 = 
  
  EndpointServiceClient 
 
 . 
 create 
 ( 
 settings 
 )) 
  
 { 
  
  MachineSpec 
 
  
 machineSpec 
  
 = 
  
  MachineSpec 
 
 . 
 newBuilder 
 (). 
  setMachineType 
 
 ( 
 "n1-standard-2" 
 ). 
 build 
 (); 
  
  DedicatedResources 
 
  
 dedicatedResources 
  
 = 
  
  DedicatedResources 
 
 . 
 newBuilder 
 (). 
 setMinReplicaCount 
 ( 
 1 
 ). 
 setMachineSpec 
 ( 
 machineSpec 
 ). 
 build 
 (); 
  
 String 
  
 modelName 
  
 = 
  
  ModelName 
 
 . 
 of 
 ( 
 project 
 , 
  
 location 
 , 
  
 model 
 ). 
 toString 
 (); 
  
  DeployedModel 
 
  
 deployedModel 
  
 = 
  
  DeployedModel 
 
 . 
 newBuilder 
 () 
  
 . 
 setModel 
 ( 
 modelName 
 ) 
  
 . 
 setDisplayName 
 ( 
 deployedModelDisplayName 
 ) 
  
 // `dedicated_resources` must be used for non-AutoML models 
  
 . 
 setDedicatedResources 
 ( 
 dedicatedResources 
 ) 
  
 . 
 build 
 (); 
  
 // key '0' assigns traffic for the newly deployed model 
  
 // Traffic percentage values must add up to 100 
  
 // Leave dictionary empty if endpoint should not accept any traffic 
  
 Map<String 
 , 
  
 Integer 
>  
 trafficSplit 
  
 = 
  
 new 
  
 HashMap 
<> (); 
  
 trafficSplit 
 . 
 put 
 ( 
 "0" 
 , 
  
 100 
 ); 
  
  EndpointName 
 
  
 endpoint 
  
 = 
  
  EndpointName 
 
 . 
 of 
 ( 
 project 
 , 
  
 location 
 , 
  
 endpointId 
 ); 
  
 OperationFuture<DeployModelResponse 
 , 
  
 DeployModelOperationMetadata 
>  
 response 
  
 = 
  
 client 
 . 
  deployModelAsync 
 
 ( 
 endpoint 
 , 
  
 deployedModel 
 , 
  
 trafficSplit 
 ); 
  
 // You can use OperationFuture.getInitialFuture to get a future representing the initial 
  
 // response to the request, which contains information while the operation is in progress. 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Operation name: %s\n" 
 , 
  
 response 
 . 
 getInitialFuture 
 (). 
  get 
 
 (). 
 getName 
 ()); 
  
 // OperationFuture.get() will block until the operation is finished. 
  
  DeployModelResponse 
 
  
 deployModelResponse 
  
 = 
  
 response 
 . 
  get 
 
 (); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "deployModelResponse: %s\n" 
 , 
  
 deployModelResponse 
 ); 
  
 } 
  
 } 
 } 
 

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google.cloud 
  
 import 
 aiplatform 
 def 
  
 deploy_model_custom_trained_model_sample 
 ( 
 project 
 : 
 str 
 , 
 endpoint_id 
 : 
 str 
 , 
 model_name 
 : 
 str 
 , 
 deployed_model_display_name 
 : 
 str 
 , 
 location 
 : 
 str 
 = 
 "us-central1" 
 , 
 api_endpoint 
 : 
 str 
 = 
 "us-central1-aiplatform.googleapis.com" 
 , 
 timeout 
 : 
 int 
 = 
 7200 
 , 
 ): 
 # The AI Platform services require regional API endpoints. 
 client_options 
 = 
 { 
 "api_endpoint" 
 : 
 api_endpoint 
 } 
 # Initialize client that will be used to create and send requests. 
 # This client only needs to be created once, and can be reused for multiple requests. 
 client 
 = 
 aiplatform 
 . 
 gapic 
 . 
  EndpointServiceClient 
 
 ( 
 client_options 
 = 
 client_options 
 ) 
 deployed_model 
 = 
 { 
 # format: 'projects/{project}/locations/{location}/models/{model}' 
 "model" 
 : 
 model_name 
 , 
 "display_name" 
 : 
 deployed_model_display_name 
 , 
 # `dedicated_resources` must be used for non-AutoML models 
 "dedicated_resources" 
 : 
 { 
 "min_replica_count" 
 : 
 1 
 , 
 "machine_spec" 
 : 
 { 
 "machine_type" 
 : 
 "n1-standard-2" 
 , 
 # Accelerators can be used only if the model specifies a GPU image. 
 # 'accelerator_type': aiplatform.gapic.AcceleratorType.NVIDIA_TESLA_K80, 
 # 'accelerator_count': 1, 
 }, 
 }, 
 } 
 # key '0' assigns traffic for the newly deployed model 
 # Traffic percentage values must add up to 100 
 # Leave dictionary empty if endpoint should not accept any traffic 
 traffic_split 
 = 
 { 
 "0" 
 : 
 100 
 } 
 endpoint 
 = 
 client 
 . 
  endpoint_path 
 
 ( 
 project 
 = 
 project 
 , 
 location 
 = 
 location 
 , 
 endpoint 
 = 
 endpoint_id 
 ) 
 response 
 = 
 client 
 . 
  deploy_model 
 
 ( 
 endpoint 
 = 
 endpoint 
 , 
 deployed_model 
 = 
 deployed_model 
 , 
 traffic_split 
 = 
 traffic_split 
 ) 
 print 
 ( 
 "Long running operation:" 
 , 
 response 
 . 
 operation 
 . 
 name 
 ) 
 deploy_model_response 
 = 
 response 
 . 
 result 
 ( 
 timeout 
 = 
 timeout 
 ) 
 print 
 ( 
 "deploy_model_response:" 
 , 
 deploy_model_response 
 ) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: