Deploy a model

Deploys a model using the deploy_model method.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.api.gax.longrunning. OperationFuture 
 
 ; 
 import 
  
 com.google.api.gax.longrunning. OperationTimedPollAlgorithm 
 
 ; 
 import 
  
 com.google.api.gax.retrying. RetrySettings 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. AutomaticResources 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DedicatedResources 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DeployModelOperationMetadata 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DeployModelResponse 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. DeployedModel 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. EndpointName 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. EndpointServiceClient 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. EndpointServiceSettings 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. MachineSpec 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1. ModelName 
 
 ; 
 import 
  
 com.google.cloud.aiplatform.v1.stub.EndpointServiceStubSettings 
 ; 
 import 
  
 java.io.IOException 
 ; 
 import 
  
 java.util.HashMap 
 ; 
 import 
  
 java.util.Map 
 ; 
 import 
  
 java.util.concurrent.ExecutionException 
 ; 
 import 
  
 java.util.concurrent.TimeUnit 
 ; 
 import 
  
 java.util.concurrent.TimeoutException 
 ; 
 import 
  
 org.threeten.bp.Duration 
 ; 
 public 
  
 class 
 DeployModelSample 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 throws 
  
 IOException 
 , 
  
 InterruptedException 
 , 
  
 ExecutionException 
 , 
  
 TimeoutException 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 project 
  
 = 
  
 "YOUR_PROJECT_ID" 
 ; 
  
 String 
  
 deployedModelDisplayName 
  
 = 
  
 "YOUR_DEPLOYED_MODEL_DISPLAY_NAME" 
 ; 
  
 String 
  
 endpointId 
  
 = 
  
 "YOUR_ENDPOINT_NAME" 
 ; 
  
 String 
  
 modelId 
  
 = 
  
 "YOUR_MODEL_ID" 
 ; 
  
 int 
  
 timeout 
  
 = 
  
 900 
 ; 
  
 deployModelSample 
 ( 
 project 
 , 
  
 deployedModelDisplayName 
 , 
  
 endpointId 
 , 
  
 modelId 
 , 
  
 timeout 
 ); 
  
 } 
  
 static 
  
 void 
  
 deployModelSample 
 ( 
  
 String 
  
 project 
 , 
  
 String 
  
 deployedModelDisplayName 
 , 
  
 String 
  
 endpointId 
 , 
  
 String 
  
 modelId 
 , 
  
 int 
  
 timeout 
 ) 
  
 throws 
  
 IOException 
 , 
  
 InterruptedException 
 , 
  
 ExecutionException 
 , 
  
 TimeoutException 
  
 { 
  
 // Set long-running operations (LROs) timeout 
  
 final 
  
  OperationTimedPollAlgorithm 
 
  
 operationTimedPollAlgorithm 
  
 = 
  
  OperationTimedPollAlgorithm 
 
 . 
 create 
 ( 
  
  RetrySettings 
 
 . 
 newBuilder 
 () 
  
 . 
  setInitialRetryDelay 
 
 ( 
 Duration 
 . 
 ofMillis 
 ( 
 5000L 
 )) 
  
 . 
  setRetryDelayMultiplier 
 
 ( 
 1.5 
 ) 
  
 . 
  setMaxRetryDelay 
 
 ( 
 Duration 
 . 
 ofMillis 
 ( 
 45000L 
 )) 
  
 . 
  setInitialRpcTimeout 
 
 ( 
 Duration 
 . 
 ZERO 
 ) 
  
 . 
  setRpcTimeoutMultiplier 
 
 ( 
 1.0 
 ) 
  
 . 
  setMaxRpcTimeout 
 
 ( 
 Duration 
 . 
 ZERO 
 ) 
  
 . 
  setTotalTimeout 
 
 ( 
 Duration 
 . 
 ofSeconds 
 ( 
 timeout 
 )) 
  
 . 
 build 
 ()); 
  
 EndpointServiceStubSettings 
 . 
 Builder 
  
 endpointServiceStubSettingsBuilder 
  
 = 
  
 EndpointServiceStubSettings 
 . 
 newBuilder 
 (); 
  
 endpointServiceStubSettingsBuilder 
  
 . 
 deployModelOperationSettings 
 () 
  
 . 
 setPollingAlgorithm 
 ( 
 operationTimedPollAlgorithm 
 ); 
  
 EndpointServiceStubSettings 
  
 endpointStubSettings 
  
 = 
  
 endpointServiceStubSettingsBuilder 
 . 
 build 
 (); 
  
  EndpointServiceSettings 
 
  
 endpointServiceSettings 
  
 = 
  
  EndpointServiceSettings 
 
 . 
 create 
 ( 
 endpointStubSettings 
 ); 
  
 endpointServiceSettings 
  
 = 
  
 endpointServiceSettings 
 . 
  toBuilder 
 
 () 
  
 . 
 setEndpoint 
 ( 
 "us-central1-aiplatform.googleapis.com:443" 
 ) 
  
 . 
 build 
 (); 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. After completing all of your requests, call 
  
 // the "close" method on the client to safely clean up any remaining background resources. 
  
 try 
  
 ( 
  EndpointServiceClient 
 
  
 endpointServiceClient 
  
 = 
  
  EndpointServiceClient 
 
 . 
 create 
 ( 
 endpointServiceSettings 
 )) 
  
 { 
  
 String 
  
 location 
  
 = 
  
 "us-central1" 
 ; 
  
  EndpointName 
 
  
 endpointName 
  
 = 
  
  EndpointName 
 
 . 
 of 
 ( 
 project 
 , 
  
 location 
 , 
  
 endpointId 
 ); 
  
 // key '0' assigns traffic for the newly deployed model 
  
 // Traffic percentage values must add up to 100 
  
 // Leave dictionary empty if endpoint should not accept any traffic 
  
 Map<String 
 , 
  
 Integer 
>  
 trafficSplit 
  
 = 
  
 new 
  
 HashMap 
<> (); 
  
 trafficSplit 
 . 
 put 
 ( 
 "0" 
 , 
  
 100 
 ); 
  
  ModelName 
 
  
 modelName 
  
 = 
  
  ModelName 
 
 . 
 of 
 ( 
 project 
 , 
  
 location 
 , 
  
 modelId 
 ); 
  
  AutomaticResources 
 
  
 automaticResourcesInput 
  
 = 
  
  AutomaticResources 
 
 . 
 newBuilder 
 (). 
 setMinReplicaCount 
 ( 
 1 
 ). 
 setMaxReplicaCount 
 ( 
 1 
 ). 
 build 
 (); 
  
  DeployedModel 
 
  
 deployedModelInput 
  
 = 
  
  DeployedModel 
 
 . 
 newBuilder 
 () 
  
 . 
 setModel 
 ( 
 modelName 
 . 
  toString 
 
 ()) 
  
 . 
 setDisplayName 
 ( 
 deployedModelDisplayName 
 ) 
  
 . 
 setAutomaticResources 
 ( 
 automaticResourcesInput 
 ) 
  
 . 
 build 
 (); 
  
 OperationFuture<DeployModelResponse 
 , 
  
 DeployModelOperationMetadata 
>  
 deployModelResponseFuture 
  
 = 
  
 endpointServiceClient 
 . 
  deployModelAsync 
 
 ( 
 endpointName 
 , 
  
 deployedModelInput 
 , 
  
 trafficSplit 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
  
 "Operation name: %s\n" 
 , 
  
 deployModelResponseFuture 
 . 
 getInitialFuture 
 (). 
  get 
 
 (). 
 getName 
 ()); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Waiting for operation to finish..." 
 ); 
  
  DeployModelResponse 
 
  
 deployModelResponse 
  
 = 
  
 deployModelResponseFuture 
 . 
  get 
 
 ( 
 20 
 , 
  
 TimeUnit 
 . 
 MINUTES 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Deploy Model Response" 
 ); 
  
  DeployedModel 
 
  
 deployedModel 
  
 = 
  
 deployModelResponse 
 . 
  getDeployedModel 
 
 (); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "\tDeployed Model" 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\tid: %s\n" 
 , 
  
 deployedModel 
 . 
  getId 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\tmodel: %s\n" 
 , 
  
 deployedModel 
 . 
  getModel 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\tDisplay Name: %s\n" 
 , 
  
 deployedModel 
 . 
  getDisplayName 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\tCreate Time: %s\n" 
 , 
  
 deployedModel 
 . 
  getCreateTime 
 
 ()); 
  
  DedicatedResources 
 
  
 dedicatedResources 
  
 = 
  
 deployedModel 
 . 
  getDedicatedResources 
 
 (); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "\t\tDedicated Resources" 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\t\tMin Replica Count: %s\n" 
 , 
  
 dedicatedResources 
 . 
  getMinReplicaCount 
 
 ()); 
  
  MachineSpec 
 
  
 machineSpec 
  
 = 
  
 dedicatedResources 
 . 
  getMachineSpec 
 
 (); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "\t\t\tMachine Spec" 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\t\t\tMachine Type: %s\n" 
 , 
  
 machineSpec 
 . 
  getMachineType 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\t\t\tAccelerator Type: %s\n" 
 , 
  
 machineSpec 
 . 
  getAcceleratorType 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\t\t\tAccelerator Count: %s\n" 
 , 
  
 machineSpec 
 . 
  getAcceleratorCount 
 
 ()); 
  
  AutomaticResources 
 
  
 automaticResources 
  
 = 
  
 deployedModel 
 . 
  getAutomaticResources 
 
 (); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "\t\tAutomatic Resources" 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\t\tMin Replica Count: %s\n" 
 , 
  
 automaticResources 
 . 
  getMinReplicaCount 
 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\t\t\tMax Replica Count: %s\n" 
 , 
  
 automaticResources 
 . 
  getMaxReplicaCount 
 
 ()); 
  
 } 
  
 } 
 }

Node.js

Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Node.js API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  /** 
 * TODO(developer): Uncomment these variables before running the sample.\ 
 * (Not necessary if passing values as arguments) 
 */ 
 // const modelId = "YOUR_MODEL_ID"; 
 // const endpointId = 'YOUR_ENDPOINT_ID'; 
 // const deployedModelDisplayName = 'YOUR_DEPLOYED_MODEL_DISPLAY_NAME'; 
 // const project = 'YOUR_PROJECT_ID'; 
 // const location = 'YOUR_PROJECT_LOCATION'; 
 const 
  
 modelName 
  
 = 
  
 `projects/ 
 ${ 
 project 
 } 
 /locations/ 
 ${ 
 location 
 } 
 /models/ 
 ${ 
 modelId 
 } 
 ` 
 ; 
 const 
  
 endpoint 
  
 = 
  
 `projects/ 
 ${ 
 project 
 } 
 /locations/ 
 ${ 
 location 
 } 
 /endpoints/ 
 ${ 
 endpointId 
 } 
 ` 
 ; 
 // Imports the Google Cloud Endpoint Service Client library 
 const 
  
 { 
 EndpointServiceClient 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/aiplatform 
' 
 ); 
 // Specifies the location of the api endpoint: 
 const 
  
 clientOptions 
  
 = 
  
 { 
  
 apiEndpoint 
 : 
  
 'us-central1-aiplatform.googleapis.com' 
 , 
 }; 
 // Instantiates a client 
 const 
  
 endpointServiceClient 
  
 = 
  
 new 
  
  EndpointServiceClient 
 
 ( 
 clientOptions 
 ); 
 async 
  
 function 
  
 deployModel 
 () 
  
 { 
  
 // Configure the parent resource 
  
 // key '0' assigns traffic for the newly deployed model 
  
 // Traffic percentage values must add up to 100 
  
 // Leave dictionary empty if endpoint should not accept any traffic 
  
 const 
  
 trafficSplit 
  
 = 
  
 { 
 0 
 : 
  
 100 
 }; 
  
 const 
  
 deployedModel 
  
 = 
  
 { 
  
 // format: 'projects/{project}/locations/{location}/models/{model}' 
  
 model 
 : 
  
 modelName 
 , 
  
 displayName 
 : 
  
 deployedModelDisplayName 
 , 
  
 automaticResources 
 : 
  
 { 
 minReplicaCount 
 : 
  
 1 
 , 
  
 maxReplicaCount 
 : 
  
 1 
 }, 
  
 }; 
  
 const 
  
 request 
  
 = 
  
 { 
  
 endpoint 
 , 
  
 deployedModel 
 , 
  
 trafficSplit 
 , 
  
 }; 
  
 // Get and print out a list of all the endpoints for this resource 
  
 const 
  
 [ 
 response 
 ] 
  
 = 
  
 await 
  
 endpointServiceClient 
 . 
 deployModel 
 ( 
 request 
 ); 
  
 console 
 . 
 log 
 ( 
 `Long running operation : 
 ${ 
 response 
 . 
 name 
 } 
 ` 
 ); 
  
 // Wait for operation to complete 
  
 await 
  
 response 
 . 
 promise 
 (); 
  
 const 
  
 result 
  
 = 
  
 response 
 . 
 result 
 ; 
  
 console 
 . 
 log 
 ( 
 'Deploy model response' 
 ); 
  
 const 
  
 modelDeployed 
  
 = 
  
 result 
 . 
 deployedModel 
 ; 
  
 console 
 . 
 log 
 ( 
 '\tDeployed model' 
 ); 
  
 if 
  
 ( 
 ! 
 modelDeployed 
 ) 
  
 { 
  
 console 
 . 
 log 
 ( 
 '\t\tId : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\tModel : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\tDisplay name : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\tCreate time : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\tDedicated resources' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMin replica count : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMachine spec {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\t\tMachine type : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\t\tAccelerator type : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\t\tAccelerator count : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\tAutomatic resources' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMin replica count : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMax replica count : {}' 
 ); 
  
 } 
  
 else 
  
 { 
  
 console 
 . 
 log 
 ( 
 `\t\tId : 
 ${ 
 modelDeployed 
 . 
 id 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `\t\tModel : 
 ${ 
 modelDeployed 
 . 
 model 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `\t\tDisplay name : 
 ${ 
 modelDeployed 
 . 
 displayName 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `\t\tCreate time : 
 ${ 
 modelDeployed 
 . 
 createTime 
 } 
 ` 
 ); 
  
 const 
  
 dedicatedResources 
  
 = 
  
 modelDeployed 
 . 
 dedicatedResources 
 ; 
  
 console 
 . 
 log 
 ( 
 '\t\tDedicated resources' 
 ); 
  
 if 
  
 ( 
 ! 
 dedicatedResources 
 ) 
  
 { 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMin replica count : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMachine spec {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\t\tMachine type : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\t\tAccelerator type : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\t\tAccelerator count : {}' 
 ); 
  
 } 
  
 else 
  
 { 
  
 console 
 . 
 log 
 ( 
  
 `\t\t\tMin replica count : \ 
  
 ${ 
 dedicatedResources 
 . 
 minReplicaCount 
 } 
 ` 
  
 ); 
  
 const 
  
 machineSpec 
  
 = 
  
 dedicatedResources 
 . 
 machineSpec 
 ; 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMachine spec' 
 ); 
  
 console 
 . 
 log 
 ( 
 `\t\t\t\tMachine type : 
 ${ 
 machineSpec 
 . 
 machineType 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
  
 `\t\t\t\tAccelerator type : 
 ${ 
 machineSpec 
 . 
 acceleratorType 
 } 
 ` 
  
 ); 
  
 console 
 . 
 log 
 ( 
  
 `\t\t\t\tAccelerator count : 
 ${ 
 machineSpec 
 . 
 acceleratorCount 
 } 
 ` 
  
 ); 
  
 } 
  
 const 
  
 automaticResources 
  
 = 
  
 modelDeployed 
 . 
 automaticResources 
 ; 
  
 console 
 . 
 log 
 ( 
 '\t\tAutomatic resources' 
 ); 
  
 if 
  
 ( 
 ! 
 automaticResources 
 ) 
  
 { 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMin replica count : {}' 
 ); 
  
 console 
 . 
 log 
 ( 
 '\t\t\tMax replica count : {}' 
 ); 
  
 } 
  
 else 
  
 { 
  
 console 
 . 
 log 
 ( 
  
 `\t\t\tMin replica count : \ 
  
 ${ 
 automaticResources 
 . 
 minReplicaCount 
 } 
 ` 
  
 ); 
  
 console 
 . 
 log 
 ( 
  
 `\t\t\tMax replica count : \ 
  
 ${ 
 automaticResources 
 . 
 maxReplicaCount 
 } 
 ` 
  
 ); 
  
 } 
  
 } 
 } 
 deployModel 
 ();

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google.cloud 
  
 import 
 aiplatform 
 def 
  
 deploy_model_sample 
 ( 
 project 
 : 
 str 
 , 
 endpoint_id 
 : 
 str 
 , 
 model_name 
 : 
 str 
 , 
 deployed_model_display_name 
 : 
 str 
 , 
 location 
 : 
 str 
 = 
 "us-central1" 
 , 
 api_endpoint 
 : 
 str 
 = 
 "us-central1-aiplatform.googleapis.com" 
 , 
 timeout 
 : 
 int 
 = 
 7200 
 , 
 ): 
 # The AI Platform services require regional API endpoints. 
 client_options 
 = 
 { 
 "api_endpoint" 
 : 
 api_endpoint 
 } 
 # Initialize client that will be used to create and send requests. 
 # This client only needs to be created once, and can be reused for multiple requests. 
 client 
 = 
 aiplatform 
 . 
 gapic 
 . 
  EndpointServiceClient 
 
 ( 
 client_options 
 = 
 client_options 
 ) 
 deployed_model 
 = 
 { 
 # format: 'projects/{project}/locations/{location}/models/{model}' 
 "model" 
 : 
 model_name 
 , 
 "display_name" 
 : 
 deployed_model_display_name 
 , 
 # AutoML Vision models require `automatic_resources` field 
 # Other model types may require `dedicated_resources` field instead 
 "automatic_resources" 
 : 
 { 
 "min_replica_count" 
 : 
 1 
 , 
 "max_replica_count" 
 : 
 1 
 }, 
 } 
 # key '0' assigns traffic for the newly deployed model 
 # Traffic percentage values must add up to 100 
 # Leave dictionary empty if endpoint should not accept any traffic 
 traffic_split 
 = 
 { 
 "0" 
 : 
 100 
 } 
 endpoint 
 = 
 client 
 . 
  endpoint_path 
 
 ( 
 project 
 = 
 project 
 , 
 location 
 = 
 location 
 , 
 endpoint 
 = 
 endpoint_id 
 ) 
 response 
 = 
 client 
 . 
  deploy_model 
 
 ( 
 endpoint 
 = 
 endpoint 
 , 
 deployed_model 
 = 
 deployed_model 
 , 
 traffic_split 
 = 
 traffic_split 
 ) 
 print 
 ( 
 "Long running operation:" 
 , 
 response 
 . 
 operation 
 . 
 name 
 ) 
 deploy_model_response 
 = 
 response 
 . 
 result 
 ( 
 timeout 
 = 
 timeout 
 ) 
 print 
 ( 
 "deploy_model_response:" 
 , 
 deploy_model_response 
 )

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Deploy a model Stay organized with collections Save and categorize content based on your preferences.

Explore further

Code sample

Java

Node.js

Python

What's next

Deploy a model