Train a processor version

Start a new training job for a processor

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Python

For more information, see the Document AI Python API reference documentation .

To authenticate to Document AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 typing 
  
 import 
 Optional 
 from 
  
 google.api_core.client_options 
  
 import 
 ClientOptions 
 from 
  
 google.cloud 
  
 import 
 documentai 
 # type: ignore 
 # TODO(developer): Uncomment these variables before running the sample. 
 # project_id = 'YOUR_PROJECT_ID' 
 # location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' 
 # processor_id = 'YOUR_PROCESSOR_ID' 
 # processor_version_display_name = 'new-processor-version' 
 # train_data_uri = 'gs://bucket/directory/' # (Optional) 
 # test_data_uri = 'gs://bucket/directory/' # (Optional) 
 def 
  
 train_processor_version_sample 
 ( 
 project_id 
 : 
 str 
 , 
 location 
 : 
 str 
 , 
 processor_id 
 : 
 str 
 , 
 processor_version_display_name 
 : 
 str 
 , 
 train_data_uri 
 : 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 test_data_uri 
 : 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 ) 
 - 
> None 
 : 
 # You must set the api_endpoint if you use a location other than 'us', e.g.: 
 opts 
 = 
 ClientOptions 
 ( 
 api_endpoint 
 = 
 f 
 " 
 { 
 location 
 } 
 -documentai.googleapis.com" 
 ) 
 client 
 = 
 documentai 
 . 
  DocumentProcessorServiceClient 
 
 ( 
 client_options 
 = 
 opts 
 ) 
 # The full resource name of the processor 
 # e.g. `projects/{project_id}/locations/{location}/processors/{processor_id} 
 parent 
 = 
 client 
 . 
  processor_path 
 
 ( 
 project_id 
 , 
 location 
 , 
 processor_id 
 ) 
 processor_version 
 = 
 documentai 
 . 
  ProcessorVersion 
 
 ( 
 display_name 
 = 
 processor_version_display_name 
 ) 
 # If train/test data is not supplied, the default sets in the Cloud Console will be used 
 input_data 
 = 
 documentai 
 . 
  TrainProcessorVersionRequest 
 
 . 
  InputData 
 
 ( 
 training_documents 
 = 
 documentai 
 . 
  BatchDocumentsInputConfig 
 
 ( 
 gcs_prefix 
 = 
 documentai 
 . 
  GcsPrefix 
 
 ( 
 gcs_uri_prefix 
 = 
 train_data_uri 
 ) 
 ), 
 test_documents 
 = 
 documentai 
 . 
  BatchDocumentsInputConfig 
 
 ( 
 gcs_prefix 
 = 
 documentai 
 . 
  GcsPrefix 
 
 ( 
 gcs_uri_prefix 
 = 
 test_data_uri 
 ) 
 ), 
 ) 
 request 
 = 
 documentai 
 . 
  TrainProcessorVersionRequest 
 
 ( 
 parent 
 = 
 parent 
 , 
 processor_version 
 = 
 processor_version 
 , 
 input_data 
 = 
 input_data 
 ) 
 operation 
 = 
 client 
 . 
  train_processor_version 
 
 ( 
 request 
 = 
 request 
 ) 
 # Print operation details 
 print 
 ( 
 operation 
 . 
 operation 
 . 
 name 
 ) 
 # Wait for operation to complete 
 response 
 = 
 documentai 
 . 
  TrainProcessorVersionResponse 
 
 ( 
 operation 
 . 
 result 
 ()) 
 metadata 
 = 
 documentai 
 . 
  TrainProcessorVersionMetadata 
 
 ( 
 operation 
 . 
 metadata 
 ) 
 print 
 ( 
 f 
 "New Processor Version: 
 { 
 response 
 . 
 processor_version 
 } 
 " 
 ) 
 print 
 ( 
 f 
 "Training Set Validation: 
 { 
 metadata 
 . 
 training_dataset_validation 
 } 
 " 
 ) 
 print 
 ( 
 f 
 "Test Set Validation: 
 { 
 metadata 
 . 
 test_dataset_validation 
 } 
 " 
 ) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: