Track objects

Object tracking tracks multiple objects detected in an input video.

Use the standard model

The following code sample demonstrates how to do object tracking using the streaming client library.

Java

To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.api.gax.rpc. BidiStream 
 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingAnnotation 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingFrame 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoRequest 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoResponse 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingFeature 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingLabelDetectionConfig 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingVideoAnnotationResults 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingVideoConfig 
 ; 
 import 
  
 com.google.cloud.videointelligence.v1p3beta1.StreamingVideoIntelligenceServiceClient 
 ; 
 import 
  
 com.google.protobuf. ByteString 
 
 ; 
 import 
  
 io.grpc.StatusRuntimeException 
 ; 
 import 
  
 java.io.IOException 
 ; 
 import 
  
 java.nio.file.Files 
 ; 
 import 
  
 java.nio.file.Path 
 ; 
 import 
  
 java.nio.file.Paths 
 ; 
 import 
  
 java.util.Arrays 
 ; 
 import 
  
 java.util.concurrent.TimeoutException 
 ; 
 class 
 StreamingObjectTracking 
  
 { 
  
 // Perform streaming video object tracking 
  
 static 
  
 void 
  
 streamingObjectTracking 
 ( 
 String 
  
 filePath 
 ) 
  
 throws 
  
 IOException 
 , 
  
 TimeoutException 
 , 
  
 StatusRuntimeException 
  
 { 
  
 // String filePath = "path_to_your_video_file"; 
  
 try 
  
 ( 
 StreamingVideoIntelligenceServiceClient 
  
 client 
  
 = 
  
 StreamingVideoIntelligenceServiceClient 
 . 
 create 
 ()) 
  
 { 
  
 Path 
  
 path 
  
 = 
  
 Paths 
 . 
 get 
 ( 
 filePath 
 ); 
  
 byte 
 [] 
  
 data 
  
 = 
  
 Files 
 . 
 readAllBytes 
 ( 
 path 
 ); 
  
 // Set the chunk size to 5MB (recommended less than 10MB). 
  
 int 
  
 chunkSize 
  
 = 
  
 5 
  
 * 
  
 1024 
  
 * 
  
 1024 
 ; 
  
 int 
  
 numChunks 
  
 = 
  
 ( 
 int 
 ) 
  
 Math 
 . 
 ceil 
 (( 
 double 
 ) 
  
 data 
 . 
 length 
  
 / 
  
 chunkSize 
 ); 
  
 StreamingLabelDetectionConfig 
  
 labelConfig 
  
 = 
  
 StreamingLabelDetectionConfig 
 . 
 newBuilder 
 (). 
 setStationaryCamera 
 ( 
 false 
 ). 
 build 
 (); 
  
 StreamingVideoConfig 
  
 streamingVideoConfig 
  
 = 
  
 StreamingVideoConfig 
 . 
 newBuilder 
 () 
  
 . 
 setFeature 
 ( 
 StreamingFeature 
 . 
 STREAMING_OBJECT_TRACKING 
 ) 
  
 . 
 setLabelDetectionConfig 
 ( 
 labelConfig 
 ) 
  
 . 
 build 
 (); 
  
 BidiStream<StreamingAnnotateVideoRequest 
 , 
  
 StreamingAnnotateVideoResponse 
>  
 call 
  
 = 
  
 client 
 . 
 streamingAnnotateVideoCallable 
 (). 
 call 
 (); 
  
 // The first request must **only** contain the audio configuration: 
  
 call 
 . 
 send 
 ( 
  
 StreamingAnnotateVideoRequest 
 . 
 newBuilder 
 (). 
 setVideoConfig 
 ( 
 streamingVideoConfig 
 ). 
 build 
 ()); 
  
 // Subsequent requests must **only** contain the audio data. 
  
 // Send the requests in chunks 
  
 for 
  
 ( 
 int 
  
 i 
  
 = 
  
 0 
 ; 
  
 i 
 < 
 numChunks 
 ; 
  
 i 
 ++ 
 ) 
  
 { 
  
 call 
 . 
 send 
 ( 
  
 StreamingAnnotateVideoRequest 
 . 
 newBuilder 
 () 
  
 . 
 setInputContent 
 ( 
  
  ByteString 
 
 . 
  copyFrom 
 
 ( 
  
 Arrays 
 . 
 copyOfRange 
 ( 
 data 
 , 
  
 i 
  
 * 
  
 chunkSize 
 , 
  
 i 
  
 * 
  
 chunkSize 
  
 + 
  
 chunkSize 
 ))) 
  
 . 
 build 
 ()); 
  
 } 
  
 // Tell the service you are done sending data 
  
 call 
 . 
 closeSend 
 (); 
  
 for 
  
 ( 
 StreamingAnnotateVideoResponse 
  
 response 
  
 : 
  
 call 
 ) 
  
 { 
  
 StreamingVideoAnnotationResults 
  
 annotationResults 
  
 = 
  
 response 
 . 
 getAnnotationResults 
 (); 
  
 for 
  
 ( 
 ObjectTrackingAnnotation 
  
 objectAnnotations 
  
 : 
  
 annotationResults 
 . 
 getObjectAnnotationsList 
 ()) 
  
 { 
  
 String 
  
 entity 
  
 = 
  
 objectAnnotations 
 . 
 getEntity 
 (). 
  getDescription 
 
 (); 
  
 float 
  
 confidence 
  
 = 
  
 objectAnnotations 
 . 
 getConfidence 
 (); 
  
 long 
  
 trackId 
  
 = 
  
 objectAnnotations 
 . 
 getTrackId 
 (); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "%s: %f (ID: %d)\n" 
 , 
  
 entity 
 , 
  
 confidence 
 , 
  
 trackId 
 ); 
  
 // In streaming, there is always one frame. 
  
 ObjectTrackingFrame 
  
 frame 
  
 = 
  
 objectAnnotations 
 . 
 getFrames 
 ( 
 0 
 ); 
  
 double 
  
 offset 
  
 = 
  
 frame 
 . 
 getTimeOffset 
 (). 
 getSeconds 
 () 
  
 + 
  
 frame 
 . 
 getTimeOffset 
 (). 
 getNanos 
 () 
  
 / 
  
 1e9 
 ; 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "Offset: %f\n" 
 , 
  
 offset 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Bounding Box:" 
 ); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\tLeft: %f\n" 
 , 
  
 frame 
 . 
 getNormalizedBoundingBox 
 (). 
 getLeft 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\tTop: %f\n" 
 , 
  
 frame 
 . 
 getNormalizedBoundingBox 
 (). 
 getTop 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\tRight: %f\n" 
 , 
  
 frame 
 . 
 getNormalizedBoundingBox 
 (). 
 getRight 
 ()); 
  
 System 
 . 
 out 
 . 
 format 
 ( 
 "\tBottom: %f\n" 
 , 
  
 frame 
 . 
 getNormalizedBoundingBox 
 (). 
 getBottom 
 ()); 
  
 } 
  
 } 
  
 } 
  
 } 
 }

Node.js

To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  /** 
 * TODO(developer): Uncomment these variables before running the sample. 
 */ 
 // const path = 'Local file to analyze, e.g. ./my-file.mp4'; 
 const 
  
 { 
 StreamingVideoIntelligenceServiceClient 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/video-intelligence 
' 
 ). 
 v1p3beta1 
 ; 
 const 
  
 fs 
  
 = 
  
 require 
 ( 
 'fs' 
 ); 
 // Instantiates a client 
 const 
  
 client 
  
 = 
  
 new 
  
  StreamingVideoIntelligenceServiceClient 
 
 (); 
 // Streaming configuration 
 const 
  
 configRequest 
  
 = 
  
 { 
  
 videoConfig 
 : 
  
 { 
  
 feature 
 : 
  
 ' STREAMING_OBJECT_TRACKING 
' 
 , 
  
 }, 
 }; 
 const 
  
 readStream 
  
 = 
  
 fs 
 . 
 createReadStream 
 ( 
 path 
 , 
  
 { 
  
 highWaterMark 
 : 
  
 5 
  
 * 
  
 1024 
  
 * 
  
 1024 
 , 
  
 //chunk size set to 5MB (recommended less than 10MB) 
  
 encoding 
 : 
  
 'base64' 
 , 
 }); 
 //Load file content 
 const 
  
 chunks 
  
 = 
  
 []; 
 readStream 
  
 . 
 on 
 ( 
 'data' 
 , 
  
 chunk 
  
 = 
>  
 { 
  
 const 
  
 request 
  
 = 
  
 { 
  
 inputContent 
 : 
  
 chunk 
 . 
 toString 
 (), 
  
 }; 
  
 chunks 
 . 
 push 
 ( 
 request 
 ); 
  
 }) 
  
 . 
 on 
 ( 
 'close' 
 , 
  
 () 
  
 = 
>  
 { 
  
 // configRequest should be the first in the stream of requests 
  
 stream 
 . 
 write 
 ( 
 configRequest 
 ); 
  
 for 
  
 ( 
 let 
  
 i 
  
 = 
  
 0 
 ; 
  
 i 
 < 
 chunks 
 . 
 length 
 ; 
  
 i 
 ++ 
 ) 
  
 { 
  
 stream 
 . 
 write 
 ( 
 chunks 
 [ 
 i 
 ]); 
  
 } 
  
 stream 
 . 
 end 
 (); 
  
 }); 
 const 
  
 options 
  
 = 
  
 { 
 timeout 
 : 
  
 120000 
 }; 
 // Create a job using a long-running operation 
 const 
  
 stream 
  
 = 
  
 client 
 . 
 streamingAnnotateVideo 
 ( 
 options 
 ). 
 on 
 ( 
 'data' 
 , 
  
 response 
  
 = 
>  
 { 
  
 //Gets annotations for video 
  
 const 
  
 annotations 
  
 = 
  
 response 
 . 
 annotationResults 
 ; 
  
 const 
  
 objects 
  
 = 
  
 annotations 
 . 
 objectAnnotations 
 ; 
  
 objects 
 . 
 forEach 
 ( 
 object 
  
 = 
>  
 { 
  
 console 
 . 
 log 
 ( 
 `Entity description: 
 ${ 
 object 
 . 
 entity 
 . 
 description 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `Entity id: 
 ${ 
 object 
 . 
 entity 
 . 
 entityId 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `Track id: 
 ${ 
 object 
 . 
 trackId 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 `Confidence: 
 ${ 
 object 
 . 
 confidence 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
  
 `Time offset for the frame: 
 ${ 
  
 object 
 . 
 frames 
 [ 
 0 
 ]. 
 timeOffset 
 . 
 seconds 
  
 || 
  
 0 
  
 } 
 ` 
  
 + 
  
 `. 
 ${ 
 ( 
 object 
 . 
 frames 
 [ 
 0 
 ]. 
 timeOffset 
 . 
 nanos 
  
 / 
  
 1e6 
 ). 
 toFixed 
 ( 
 0 
 ) 
 } 
 s` 
  
 ); 
  
 //Every annotation has only one frame. 
  
 const 
  
 box 
  
 = 
  
 object 
 . 
 frames 
 [ 
 0 
 ]. 
 normalizedBoundingBox 
 ; 
  
 console 
 . 
 log 
 ( 
 'Bounding box position:' 
 ); 
  
 console 
 . 
 log 
 ( 
 ` left  : 
 ${ 
 box 
 . 
 left 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 ` top   : 
 ${ 
 box 
 . 
 top 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 ` right : 
 ${ 
 box 
 . 
 right 
 } 
 ` 
 ); 
  
 console 
 . 
 log 
 ( 
 ` bottom: 
 ${ 
 box 
 . 
 bottom 
 } 
 ` 
 ); 
  
 }); 
 });

Python

To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google.cloud 
  
 import 
 videointelligence_v1p3beta1 
 as 
 videointelligence 
 # path = 'path_to_file' 
 client 
 = 
 videointelligence 
 . 
 StreamingVideoIntelligenceServiceClient 
 () 
 # Set streaming config. 
 config 
 = 
 videointelligence 
 . 
 StreamingVideoConfig 
 ( 
 feature 
 = 
 ( 
 videointelligence 
 . 
 StreamingFeature 
 . 
 STREAMING_OBJECT_TRACKING 
 ) 
 ) 
 # config_request should be the first in the stream of requests. 
 config_request 
 = 
 videointelligence 
 . 
 StreamingAnnotateVideoRequest 
 ( 
 video_config 
 = 
 config 
 ) 
 # Set the chunk size to 5MB (recommended less than 10MB). 
 chunk_size 
 = 
 5 
 * 
 1024 
 * 
 1024 
 # Load file content. 
 stream 
 = 
 [] 
 with 
 io 
 . 
 open 
 ( 
 path 
 , 
 "rb" 
 ) 
 as 
 video_file 
 : 
 while 
 True 
 : 
 data 
 = 
 video_file 
 . 
 read 
 ( 
 chunk_size 
 ) 
 if 
 not 
 data 
 : 
 break 
 stream 
 . 
 append 
 ( 
 data 
 ) 
 def 
  
 stream_generator 
 (): 
 yield 
 config_request 
 for 
 chunk 
 in 
 stream 
 : 
 yield 
 videointelligence 
 . 
 StreamingAnnotateVideoRequest 
 ( 
 input_content 
 = 
 chunk 
 ) 
 requests 
 = 
 stream_generator 
 () 
 # streaming_annotate_video returns a generator. 
 # The default timeout is about 300 seconds. 
 # To process longer videos it should be set to 
 # larger than the length (in seconds) of the stream. 
 responses 
 = 
 client 
 . 
 streaming_annotate_video 
 ( 
 requests 
 , 
 timeout 
 = 
 900 
 ) 
 # Each response corresponds to about 1 second of video. 
 for 
 response 
 in 
 responses 
 : 
 # Check for errors. 
 if 
 response 
 . 
 error 
 . 
 message 
 : 
 print 
 ( 
 response 
 . 
 error 
 . 
 message 
 ) 
 break 
 object_annotations 
 = 
 response 
 . 
 annotation_results 
 . 
 object_annotations 
 # object_annotations could be empty 
 if 
 not 
 object_annotations 
 : 
 continue 
 for 
 annotation 
 in 
 object_annotations 
 : 
 # Each annotation has one frame, which has a timeoffset. 
 frame 
 = 
 annotation 
 . 
 frames 
 [ 
 0 
 ] 
 time_offset 
 = 
 ( 
 frame 
 . 
 time_offset 
 . 
 seconds 
 + 
 frame 
 . 
 time_offset 
 . 
 microseconds 
 / 
 1e6 
 ) 
 description 
 = 
 annotation 
 . 
 entity 
 . 
 description 
 confidence 
 = 
 annotation 
 . 
 confidence 
 # track_id tracks the same object in the video. 
 track_id 
 = 
 annotation 
 . 
 track_id 
 # description is in Unicode 
 print 
 ( 
 " 
 {} 
 s" 
 . 
 format 
 ( 
 time_offset 
 )) 
 print 
 ( 
 " 
 \t 
 Entity description: 
 {} 
 " 
 . 
 format 
 ( 
 description 
 )) 
 print 
 ( 
 " 
 \t 
 Track Id: 
 {} 
 " 
 . 
 format 
 ( 
 track_id 
 )) 
 if 
 annotation 
 . 
 entity 
 . 
 entity_id 
 : 
 print 
 ( 
 " 
 \t 
 Entity id: 
 {} 
 " 
 . 
 format 
 ( 
 annotation 
 . 
 entity 
 . 
 entity_id 
 )) 
 print 
 ( 
 " 
 \t 
 Confidence: 
 {} 
 " 
 . 
 format 
 ( 
 confidence 
 )) 
 # Every annotation has only one frame 
 frame 
 = 
 annotation 
 . 
 frames 
 [ 
 0 
 ] 
 box 
 = 
 frame 
 . 
 normalized_bounding_box 
 print 
 ( 
 " 
 \t 
 Bounding box position:" 
 ) 
 print 
 ( 
 " 
 \t 
 left  : 
 {} 
 " 
 . 
 format 
 ( 
 box 
 . 
 left 
 )) 
 print 
 ( 
 " 
 \t 
 top   : 
 {} 
 " 
 . 
 format 
 ( 
 box 
 . 
 top 
 )) 
 print 
 ( 
 " 
 \t 
 right : 
 {} 
 " 
 . 
 format 
 ( 
 box 
 . 
 right 
 )) 
 print 
 ( 
 " 
 \t 
 bottom: 
 {} 
 \n 
 " 
 . 
 format 
 ( 
 box 
 . 
 bottom 
 ))

Track objects Stay organized with collections Save and categorize content based on your preferences.

Use the standard model

Java

Node.js

Python

Track objects