Creates a data labeling job using the create_data_labeling_job method.
Code sample
Java
Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .
To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.cloud.aiplatform.v1. DataLabelingJob
;
import
com.google.cloud.aiplatform.v1. DatasetName
;
import
com.google.cloud.aiplatform.v1. JobServiceClient
;
import
com.google.cloud.aiplatform.v1. JobServiceSettings
;
import
com.google.cloud.aiplatform.v1. LocationName
;
import
com.google.protobuf. Value
;
import
com.google.protobuf.util. JsonFormat
;
import
com.google.type. Money
;
import
java.io.IOException
;
import
java.util.Map
;
public
class
CreateDataLabelingJobSample
{
public
static
void
main
(
String
[]
args
)
throws
IOException
{
// TODO(developer): Replace these variables before running the sample.
String
project
=
"YOUR_PROJECT_ID"
;
String
displayName
=
"YOUR_DATA_LABELING_DISPLAY_NAME"
;
String
datasetId
=
"YOUR_DATASET_ID"
;
String
instructionUri
=
"gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_data_labeling_source/file.pdf"
;
String
inputsSchemaUri
=
"YOUR_INPUT_SCHEMA_URI"
;
String
annotationSpec
=
"YOUR_ANNOTATION_SPEC"
;
createDataLabelingJob
(
project
,
displayName
,
datasetId
,
instructionUri
,
inputsSchemaUri
,
annotationSpec
);
}
static
void
createDataLabelingJob
(
String
project
,
String
displayName
,
String
datasetId
,
String
instructionUri
,
String
inputsSchemaUri
,
String
annotationSpec
)
throws
IOException
{
JobServiceSettings
jobServiceSettings
=
JobServiceSettings
.
newBuilder
()
.
setEndpoint
(
"us-central1-aiplatform.googleapis.com:443"
)
.
build
();
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try
(
JobServiceClient
jobServiceClient
=
JobServiceClient
.
create
(
jobServiceSettings
))
{
String
location
=
"us-central1"
;
LocationName
locationName
=
LocationName
.
of
(
project
,
location
);
String
jsonString
=
"{\"annotation_specs\": [ "
+
annotationSpec
+
"]}"
;
Value
.
Builder
annotationSpecValue
=
Value
.
newBuilder
();
JsonFormat
.
parser
().
merge
(
jsonString
,
annotationSpecValue
);
DatasetName
datasetName
=
DatasetName
.
of
(
project
,
location
,
datasetId
);
DataLabelingJob
dataLabelingJob
=
DataLabelingJob
.
newBuilder
()
.
setDisplayName
(
displayName
)
.
setLabelerCount
(
1
)
.
setInstructionUri
(
instructionUri
)
.
setInputsSchemaUri
(
inputsSchemaUri
)
.
addDatasets
(
datasetName
.
toString
())
.
setInputs
(
annotationSpecValue
)
.
putAnnotationLabels
(
"aiplatform.googleapis.com/annotation_set_name"
,
"my_test_saved_query"
)
.
build
();
DataLabelingJob
dataLabelingJobResponse
=
jobServiceClient
.
createDataLabelingJob
(
locationName
,
dataLabelingJob
);
System
.
out
.
println
(
"Create Data Labeling Job Response"
);
System
.
out
.
format
(
"\tName: %s\n"
,
dataLabelingJobResponse
.
getName
());
System
.
out
.
format
(
"\tDisplay Name: %s\n"
,
dataLabelingJobResponse
.
getDisplayName
());
System
.
out
.
format
(
"\tDatasets: %s\n"
,
dataLabelingJobResponse
.
getDatasetsList
());
System
.
out
.
format
(
"\tLabeler Count: %s\n"
,
dataLabelingJobResponse
.
getLabelerCount
());
System
.
out
.
format
(
"\tInstruction Uri: %s\n"
,
dataLabelingJobResponse
.
getInstructionUri
());
System
.
out
.
format
(
"\tInputs Schema Uri: %s\n"
,
dataLabelingJobResponse
.
getInputsSchemaUri
());
System
.
out
.
format
(
"\tInputs: %s\n"
,
dataLabelingJobResponse
.
getInputs
());
System
.
out
.
format
(
"\tState: %s\n"
,
dataLabelingJobResponse
.
getState
());
System
.
out
.
format
(
"\tLabeling Progress: %s\n"
,
dataLabelingJobResponse
.
getLabelingProgress
());
System
.
out
.
format
(
"\tCreate Time: %s\n"
,
dataLabelingJobResponse
.
getCreateTime
());
System
.
out
.
format
(
"\tUpdate Time: %s\n"
,
dataLabelingJobResponse
.
getUpdateTime
());
System
.
out
.
format
(
"\tLabels: %s\n"
,
dataLabelingJobResponse
.
getLabelsMap
());
System
.
out
.
format
(
"\tSpecialist Pools: %s\n"
,
dataLabelingJobResponse
.
getSpecialistPoolsList
());
for
(
Map
.
Entry<String
,
String
>
annotationLabelMap
:
dataLabelingJobResponse
.
getAnnotationLabelsMap
().
entrySet
())
{
System
.
out
.
println
(
"\tAnnotation Level"
);
System
.
out
.
format
(
"\t\tkey: %s\n"
,
annotationLabelMap
.
getKey
());
System
.
out
.
format
(
"\t\tvalue: %s\n"
,
annotationLabelMap
.
getValue
());
}
Money
money
=
dataLabelingJobResponse
.
getCurrentSpend
();
System
.
out
.
println
(
"\tCurrent Spend"
);
System
.
out
.
format
(
"\t\tCurrency Code: %s\n"
,
money
.
getCurrencyCode
());
System
.
out
.
format
(
"\t\tUnits: %s\n"
,
money
.
getUnits
());
System
.
out
.
format
(
"\t\tNanos: %s\n"
,
money
.
getNanos
());
}
}
}
Python
Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .
To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
from
google.cloud
import
aiplatform
from
google.protobuf
import
json_format
from
google.protobuf.struct_pb2
import
Value
def
create_data_labeling_job_sample
(
project
:
str
,
display_name
:
str
,
dataset_name
:
str
,
instruction_uri
:
str
,
inputs_schema_uri
:
str
,
annotation_spec
:
str
,
location
:
str
=
"us-central1"
,
api_endpoint
:
str
=
"us-central1-aiplatform.googleapis.com"
,
):
# The AI Platform services require regional API endpoints.
client_options
=
{
"api_endpoint"
:
api_endpoint
}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client
=
aiplatform
.
gapic
.
JobServiceClient
(
client_options
=
client_options
)
inputs_dict
=
{
"annotation_specs"
:
[
annotation_spec
]}
inputs
=
json_format
.
ParseDict
(
inputs_dict
,
Value
())
data_labeling_job
=
{
"display_name"
:
display_name
,
# Full resource name: projects/{project_id}/locations/{location}/datasets/{dataset_id}
"datasets"
:
[
dataset_name
],
# labeler_count must be 1, 3, or 5
"labeler_count"
:
1
,
"instruction_uri"
:
instruction_uri
,
"inputs_schema_uri"
:
inputs_schema_uri
,
"inputs"
:
inputs
,
"annotation_labels"
:
{
"aiplatform.googleapis.com/annotation_set_name"
:
"my_test_saved_query"
},
}
parent
=
f
"projects/
{
project
}
/locations/
{
location
}
"
response
=
client
.
create_data_labeling_job
(
parent
=
parent
,
data_labeling_job
=
data_labeling_job
)
print
(
"response:"
,
response
)
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .