Deploys a model using the deploy_model method.
Explore further
For detailed documentation that includes this code sample, see the following:
- Get inferences from a image object detection model
- Get inferences from an image classification model
- Get predictions from a text classification model
- Get predictions from a text entity extraction model
- Get predictions from a text sentiment analysis model
Code sample
Java
Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .
To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.api.gax.longrunning. OperationFuture
;
import
com.google.api.gax.longrunning. OperationTimedPollAlgorithm
;
import
com.google.api.gax.retrying. RetrySettings
;
import
com.google.cloud.aiplatform.v1. AutomaticResources
;
import
com.google.cloud.aiplatform.v1. DedicatedResources
;
import
com.google.cloud.aiplatform.v1. DeployModelOperationMetadata
;
import
com.google.cloud.aiplatform.v1. DeployModelResponse
;
import
com.google.cloud.aiplatform.v1. DeployedModel
;
import
com.google.cloud.aiplatform.v1. EndpointName
;
import
com.google.cloud.aiplatform.v1. EndpointServiceClient
;
import
com.google.cloud.aiplatform.v1. EndpointServiceSettings
;
import
com.google.cloud.aiplatform.v1. MachineSpec
;
import
com.google.cloud.aiplatform.v1. ModelName
;
import
com.google.cloud.aiplatform.v1.stub.EndpointServiceStubSettings
;
import
java.io.IOException
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.concurrent.ExecutionException
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeoutException
;
import
org.threeten.bp.Duration
;
public
class
DeployModelSample
{
public
static
void
main
(
String
[]
args
)
throws
IOException
,
InterruptedException
,
ExecutionException
,
TimeoutException
{
// TODO(developer): Replace these variables before running the sample.
String
project
=
"YOUR_PROJECT_ID"
;
String
deployedModelDisplayName
=
"YOUR_DEPLOYED_MODEL_DISPLAY_NAME"
;
String
endpointId
=
"YOUR_ENDPOINT_NAME"
;
String
modelId
=
"YOUR_MODEL_ID"
;
int
timeout
=
900
;
deployModelSample
(
project
,
deployedModelDisplayName
,
endpointId
,
modelId
,
timeout
);
}
static
void
deployModelSample
(
String
project
,
String
deployedModelDisplayName
,
String
endpointId
,
String
modelId
,
int
timeout
)
throws
IOException
,
InterruptedException
,
ExecutionException
,
TimeoutException
{
// Set long-running operations (LROs) timeout
final
OperationTimedPollAlgorithm
operationTimedPollAlgorithm
=
OperationTimedPollAlgorithm
.
create
(
RetrySettings
.
newBuilder
()
.
setInitialRetryDelay
(
Duration
.
ofMillis
(
5000L
))
.
setRetryDelayMultiplier
(
1.5
)
.
setMaxRetryDelay
(
Duration
.
ofMillis
(
45000L
))
.
setInitialRpcTimeout
(
Duration
.
ZERO
)
.
setRpcTimeoutMultiplier
(
1.0
)
.
setMaxRpcTimeout
(
Duration
.
ZERO
)
.
setTotalTimeout
(
Duration
.
ofSeconds
(
timeout
))
.
build
());
EndpointServiceStubSettings
.
Builder
endpointServiceStubSettingsBuilder
=
EndpointServiceStubSettings
.
newBuilder
();
endpointServiceStubSettingsBuilder
.
deployModelOperationSettings
()
.
setPollingAlgorithm
(
operationTimedPollAlgorithm
);
EndpointServiceStubSettings
endpointStubSettings
=
endpointServiceStubSettingsBuilder
.
build
();
EndpointServiceSettings
endpointServiceSettings
=
EndpointServiceSettings
.
create
(
endpointStubSettings
);
endpointServiceSettings
=
endpointServiceSettings
.
toBuilder
()
.
setEndpoint
(
"us-central1-aiplatform.googleapis.com:443"
)
.
build
();
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try
(
EndpointServiceClient
endpointServiceClient
=
EndpointServiceClient
.
create
(
endpointServiceSettings
))
{
String
location
=
"us-central1"
;
EndpointName
endpointName
=
EndpointName
.
of
(
project
,
location
,
endpointId
);
// key '0' assigns traffic for the newly deployed model
// Traffic percentage values must add up to 100
// Leave dictionary empty if endpoint should not accept any traffic
Map<String
,
Integer
>
trafficSplit
=
new
HashMap
<> ();
trafficSplit
.
put
(
"0"
,
100
);
ModelName
modelName
=
ModelName
.
of
(
project
,
location
,
modelId
);
AutomaticResources
automaticResourcesInput
=
AutomaticResources
.
newBuilder
().
setMinReplicaCount
(
1
).
setMaxReplicaCount
(
1
).
build
();
DeployedModel
deployedModelInput
=
DeployedModel
.
newBuilder
()
.
setModel
(
modelName
.
toString
())
.
setDisplayName
(
deployedModelDisplayName
)
.
setAutomaticResources
(
automaticResourcesInput
)
.
build
();
OperationFuture<DeployModelResponse
,
DeployModelOperationMetadata
>
deployModelResponseFuture
=
endpointServiceClient
.
deployModelAsync
(
endpointName
,
deployedModelInput
,
trafficSplit
);
System
.
out
.
format
(
"Operation name: %s\n"
,
deployModelResponseFuture
.
getInitialFuture
().
get
().
getName
());
System
.
out
.
println
(
"Waiting for operation to finish..."
);
DeployModelResponse
deployModelResponse
=
deployModelResponseFuture
.
get
(
20
,
TimeUnit
.
MINUTES
);
System
.
out
.
println
(
"Deploy Model Response"
);
DeployedModel
deployedModel
=
deployModelResponse
.
getDeployedModel
();
System
.
out
.
println
(
"\tDeployed Model"
);
System
.
out
.
format
(
"\t\tid: %s\n"
,
deployedModel
.
getId
());
System
.
out
.
format
(
"\t\tmodel: %s\n"
,
deployedModel
.
getModel
());
System
.
out
.
format
(
"\t\tDisplay Name: %s\n"
,
deployedModel
.
getDisplayName
());
System
.
out
.
format
(
"\t\tCreate Time: %s\n"
,
deployedModel
.
getCreateTime
());
DedicatedResources
dedicatedResources
=
deployedModel
.
getDedicatedResources
();
System
.
out
.
println
(
"\t\tDedicated Resources"
);
System
.
out
.
format
(
"\t\t\tMin Replica Count: %s\n"
,
dedicatedResources
.
getMinReplicaCount
());
MachineSpec
machineSpec
=
dedicatedResources
.
getMachineSpec
();
System
.
out
.
println
(
"\t\t\tMachine Spec"
);
System
.
out
.
format
(
"\t\t\t\tMachine Type: %s\n"
,
machineSpec
.
getMachineType
());
System
.
out
.
format
(
"\t\t\t\tAccelerator Type: %s\n"
,
machineSpec
.
getAcceleratorType
());
System
.
out
.
format
(
"\t\t\t\tAccelerator Count: %s\n"
,
machineSpec
.
getAcceleratorCount
());
AutomaticResources
automaticResources
=
deployedModel
.
getAutomaticResources
();
System
.
out
.
println
(
"\t\tAutomatic Resources"
);
System
.
out
.
format
(
"\t\t\tMin Replica Count: %s\n"
,
automaticResources
.
getMinReplicaCount
());
System
.
out
.
format
(
"\t\t\tMax Replica Count: %s\n"
,
automaticResources
.
getMaxReplicaCount
());
}
}
}
Node.js
Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Node.js API reference documentation .
To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
/**
* TODO(developer): Uncomment these variables before running the sample.\
* (Not necessary if passing values as arguments)
*/
// const modelId = "YOUR_MODEL_ID";
// const endpointId = 'YOUR_ENDPOINT_ID';
// const deployedModelDisplayName = 'YOUR_DEPLOYED_MODEL_DISPLAY_NAME';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';
const
modelName
=
`projects/
${
project
}
/locations/
${
location
}
/models/
${
modelId
}
`
;
const
endpoint
=
`projects/
${
project
}
/locations/
${
location
}
/endpoints/
${
endpointId
}
`
;
// Imports the Google Cloud Endpoint Service Client library
const
{
EndpointServiceClient
}
=
require
(
' @google-cloud/aiplatform
'
);
// Specifies the location of the api endpoint:
const
clientOptions
=
{
apiEndpoint
:
'us-central1-aiplatform.googleapis.com'
,
};
// Instantiates a client
const
endpointServiceClient
=
new
EndpointServiceClient
(
clientOptions
);
async
function
deployModel
()
{
// Configure the parent resource
// key '0' assigns traffic for the newly deployed model
// Traffic percentage values must add up to 100
// Leave dictionary empty if endpoint should not accept any traffic
const
trafficSplit
=
{
0
:
100
};
const
deployedModel
=
{
// format: 'projects/{project}/locations/{location}/models/{model}'
model
:
modelName
,
displayName
:
deployedModelDisplayName
,
automaticResources
:
{
minReplicaCount
:
1
,
maxReplicaCount
:
1
},
};
const
request
=
{
endpoint
,
deployedModel
,
trafficSplit
,
};
// Get and print out a list of all the endpoints for this resource
const
[
response
]
=
await
endpointServiceClient
.
deployModel
(
request
);
console
.
log
(
`Long running operation :
${
response
.
name
}
`
);
// Wait for operation to complete
await
response
.
promise
();
const
result
=
response
.
result
;
console
.
log
(
'Deploy model response'
);
const
modelDeployed
=
result
.
deployedModel
;
console
.
log
(
'\tDeployed model'
);
if
(
!
modelDeployed
)
{
console
.
log
(
'\t\tId : {}'
);
console
.
log
(
'\t\tModel : {}'
);
console
.
log
(
'\t\tDisplay name : {}'
);
console
.
log
(
'\t\tCreate time : {}'
);
console
.
log
(
'\t\tDedicated resources'
);
console
.
log
(
'\t\t\tMin replica count : {}'
);
console
.
log
(
'\t\t\tMachine spec {}'
);
console
.
log
(
'\t\t\t\tMachine type : {}'
);
console
.
log
(
'\t\t\t\tAccelerator type : {}'
);
console
.
log
(
'\t\t\t\tAccelerator count : {}'
);
console
.
log
(
'\t\tAutomatic resources'
);
console
.
log
(
'\t\t\tMin replica count : {}'
);
console
.
log
(
'\t\t\tMax replica count : {}'
);
}
else
{
console
.
log
(
`\t\tId :
${
modelDeployed
.
id
}
`
);
console
.
log
(
`\t\tModel :
${
modelDeployed
.
model
}
`
);
console
.
log
(
`\t\tDisplay name :
${
modelDeployed
.
displayName
}
`
);
console
.
log
(
`\t\tCreate time :
${
modelDeployed
.
createTime
}
`
);
const
dedicatedResources
=
modelDeployed
.
dedicatedResources
;
console
.
log
(
'\t\tDedicated resources'
);
if
(
!
dedicatedResources
)
{
console
.
log
(
'\t\t\tMin replica count : {}'
);
console
.
log
(
'\t\t\tMachine spec {}'
);
console
.
log
(
'\t\t\t\tMachine type : {}'
);
console
.
log
(
'\t\t\t\tAccelerator type : {}'
);
console
.
log
(
'\t\t\t\tAccelerator count : {}'
);
}
else
{
console
.
log
(
`\t\t\tMin replica count : \
${
dedicatedResources
.
minReplicaCount
}
`
);
const
machineSpec
=
dedicatedResources
.
machineSpec
;
console
.
log
(
'\t\t\tMachine spec'
);
console
.
log
(
`\t\t\t\tMachine type :
${
machineSpec
.
machineType
}
`
);
console
.
log
(
`\t\t\t\tAccelerator type :
${
machineSpec
.
acceleratorType
}
`
);
console
.
log
(
`\t\t\t\tAccelerator count :
${
machineSpec
.
acceleratorCount
}
`
);
}
const
automaticResources
=
modelDeployed
.
automaticResources
;
console
.
log
(
'\t\tAutomatic resources'
);
if
(
!
automaticResources
)
{
console
.
log
(
'\t\t\tMin replica count : {}'
);
console
.
log
(
'\t\t\tMax replica count : {}'
);
}
else
{
console
.
log
(
`\t\t\tMin replica count : \
${
automaticResources
.
minReplicaCount
}
`
);
console
.
log
(
`\t\t\tMax replica count : \
${
automaticResources
.
maxReplicaCount
}
`
);
}
}
}
deployModel
();
Python
Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .
To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
from
google.cloud
import
aiplatform
def
deploy_model_sample
(
project
:
str
,
endpoint_id
:
str
,
model_name
:
str
,
deployed_model_display_name
:
str
,
location
:
str
=
"us-central1"
,
api_endpoint
:
str
=
"us-central1-aiplatform.googleapis.com"
,
timeout
:
int
=
7200
,
):
# The AI Platform services require regional API endpoints.
client_options
=
{
"api_endpoint"
:
api_endpoint
}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client
=
aiplatform
.
gapic
.
EndpointServiceClient
(
client_options
=
client_options
)
deployed_model
=
{
# format: 'projects/{project}/locations/{location}/models/{model}'
"model"
:
model_name
,
"display_name"
:
deployed_model_display_name
,
# AutoML Vision models require `automatic_resources` field
# Other model types may require `dedicated_resources` field instead
"automatic_resources"
:
{
"min_replica_count"
:
1
,
"max_replica_count"
:
1
},
}
# key '0' assigns traffic for the newly deployed model
# Traffic percentage values must add up to 100
# Leave dictionary empty if endpoint should not accept any traffic
traffic_split
=
{
"0"
:
100
}
endpoint
=
client
.
endpoint_path
(
project
=
project
,
location
=
location
,
endpoint
=
endpoint_id
)
response
=
client
.
deploy_model
(
endpoint
=
endpoint
,
deployed_model
=
deployed_model
,
traffic_split
=
traffic_split
)
print
(
"Long running operation:"
,
response
.
operation
.
name
)
deploy_model_response
=
response
.
result
(
timeout
=
timeout
)
print
(
"deploy_model_response:"
,
deploy_model_response
)
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .