Object tracking tracks multiple objects detected in an input video.
Use the standard model
The following code sample demonstrates how to do object tracking using the streaming client library.
Java
To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.api.gax.rpc. BidiStream
;
import
com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingAnnotation
;
import
com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingFrame
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoRequest
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoResponse
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingFeature
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingLabelDetectionConfig
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingVideoAnnotationResults
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingVideoConfig
;
import
com.google.cloud.videointelligence.v1p3beta1.StreamingVideoIntelligenceServiceClient
;
import
com.google.protobuf. ByteString
;
import
io.grpc.StatusRuntimeException
;
import
java.io.IOException
;
import
java.nio.file.Files
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.util.Arrays
;
import
java.util.concurrent.TimeoutException
;
class
StreamingObjectTracking
{
// Perform streaming video object tracking
static
void
streamingObjectTracking
(
String
filePath
)
throws
IOException
,
TimeoutException
,
StatusRuntimeException
{
// String filePath = "path_to_your_video_file";
try
(
StreamingVideoIntelligenceServiceClient
client
=
StreamingVideoIntelligenceServiceClient
.
create
())
{
Path
path
=
Paths
.
get
(
filePath
);
byte
[]
data
=
Files
.
readAllBytes
(
path
);
// Set the chunk size to 5MB (recommended less than 10MB).
int
chunkSize
=
5
*
1024
*
1024
;
int
numChunks
=
(
int
)
Math
.
ceil
((
double
)
data
.
length
/
chunkSize
);
StreamingLabelDetectionConfig
labelConfig
=
StreamingLabelDetectionConfig
.
newBuilder
().
setStationaryCamera
(
false
).
build
();
StreamingVideoConfig
streamingVideoConfig
=
StreamingVideoConfig
.
newBuilder
()
.
setFeature
(
StreamingFeature
.
STREAMING_OBJECT_TRACKING
)
.
setLabelDetectionConfig
(
labelConfig
)
.
build
();
BidiStream<StreamingAnnotateVideoRequest
,
StreamingAnnotateVideoResponse
>
call
=
client
.
streamingAnnotateVideoCallable
().
call
();
// The first request must **only** contain the audio configuration:
call
.
send
(
StreamingAnnotateVideoRequest
.
newBuilder
().
setVideoConfig
(
streamingVideoConfig
).
build
());
// Subsequent requests must **only** contain the audio data.
// Send the requests in chunks
for
(
int
i
=
0
;
i
<
numChunks
;
i
++
)
{
call
.
send
(
StreamingAnnotateVideoRequest
.
newBuilder
()
.
setInputContent
(
ByteString
.
copyFrom
(
Arrays
.
copyOfRange
(
data
,
i
*
chunkSize
,
i
*
chunkSize
+
chunkSize
)))
.
build
());
}
// Tell the service you are done sending data
call
.
closeSend
();
for
(
StreamingAnnotateVideoResponse
response
:
call
)
{
StreamingVideoAnnotationResults
annotationResults
=
response
.
getAnnotationResults
();
for
(
ObjectTrackingAnnotation
objectAnnotations
:
annotationResults
.
getObjectAnnotationsList
())
{
String
entity
=
objectAnnotations
.
getEntity
().
getDescription
();
float
confidence
=
objectAnnotations
.
getConfidence
();
long
trackId
=
objectAnnotations
.
getTrackId
();
System
.
out
.
format
(
"%s: %f (ID: %d)\n"
,
entity
,
confidence
,
trackId
);
// In streaming, there is always one frame.
ObjectTrackingFrame
frame
=
objectAnnotations
.
getFrames
(
0
);
double
offset
=
frame
.
getTimeOffset
().
getSeconds
()
+
frame
.
getTimeOffset
().
getNanos
()
/
1e9
;
System
.
out
.
format
(
"Offset: %f\n"
,
offset
);
System
.
out
.
println
(
"Bounding Box:"
);
System
.
out
.
format
(
"\tLeft: %f\n"
,
frame
.
getNormalizedBoundingBox
().
getLeft
());
System
.
out
.
format
(
"\tTop: %f\n"
,
frame
.
getNormalizedBoundingBox
().
getTop
());
System
.
out
.
format
(
"\tRight: %f\n"
,
frame
.
getNormalizedBoundingBox
().
getRight
());
System
.
out
.
format
(
"\tBottom: %f\n"
,
frame
.
getNormalizedBoundingBox
().
getBottom
());
}
}
}
}
}
Node.js
To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
// const path = 'Local file to analyze, e.g. ./my-file.mp4';
const
{
StreamingVideoIntelligenceServiceClient
}
=
require
(
' @google-cloud/video-intelligence
'
).
v1p3beta1
;
const
fs
=
require
(
'fs'
);
// Instantiates a client
const
client
=
new
StreamingVideoIntelligenceServiceClient
();
// Streaming configuration
const
configRequest
=
{
videoConfig
:
{
feature
:
' STREAMING_OBJECT_TRACKING
'
,
},
};
const
readStream
=
fs
.
createReadStream
(
path
,
{
highWaterMark
:
5
*
1024
*
1024
,
//chunk size set to 5MB (recommended less than 10MB)
encoding
:
'base64'
,
});
//Load file content
const
chunks
=
[];
readStream
.
on
(
'data'
,
chunk
=
>
{
const
request
=
{
inputContent
:
chunk
.
toString
(),
};
chunks
.
push
(
request
);
})
.
on
(
'close'
,
()
=
>
{
// configRequest should be the first in the stream of requests
stream
.
write
(
configRequest
);
for
(
let
i
=
0
;
i
<
chunks
.
length
;
i
++
)
{
stream
.
write
(
chunks
[
i
]);
}
stream
.
end
();
});
const
options
=
{
timeout
:
120000
};
// Create a job using a long-running operation
const
stream
=
client
.
streamingAnnotateVideo
(
options
).
on
(
'data'
,
response
=
>
{
//Gets annotations for video
const
annotations
=
response
.
annotationResults
;
const
objects
=
annotations
.
objectAnnotations
;
objects
.
forEach
(
object
=
>
{
console
.
log
(
`Entity description:
${
object
.
entity
.
description
}
`
);
console
.
log
(
`Entity id:
${
object
.
entity
.
entityId
}
`
);
console
.
log
(
`Track id:
${
object
.
trackId
}
`
);
console
.
log
(
`Confidence:
${
object
.
confidence
}
`
);
console
.
log
(
`Time offset for the frame:
${
object
.
frames
[
0
].
timeOffset
.
seconds
||
0
}
`
+
`.
${
(
object
.
frames
[
0
].
timeOffset
.
nanos
/
1e6
).
toFixed
(
0
)
}
s`
);
//Every annotation has only one frame.
const
box
=
object
.
frames
[
0
].
normalizedBoundingBox
;
console
.
log
(
'Bounding box position:'
);
console
.
log
(
` left :
${
box
.
left
}
`
);
console
.
log
(
` top :
${
box
.
top
}
`
);
console
.
log
(
` right :
${
box
.
right
}
`
);
console
.
log
(
` bottom:
${
box
.
bottom
}
`
);
});
});
Python
To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
from
google.cloud
import
videointelligence_v1p3beta1
as
videointelligence
# path = 'path_to_file'
client
=
videointelligence
.
StreamingVideoIntelligenceServiceClient
()
# Set streaming config.
config
=
videointelligence
.
StreamingVideoConfig
(
feature
=
(
videointelligence
.
StreamingFeature
.
STREAMING_OBJECT_TRACKING
)
)
# config_request should be the first in the stream of requests.
config_request
=
videointelligence
.
StreamingAnnotateVideoRequest
(
video_config
=
config
)
# Set the chunk size to 5MB (recommended less than 10MB).
chunk_size
=
5
*
1024
*
1024
# Load file content.
stream
=
[]
with
io
.
open
(
path
,
"rb"
)
as
video_file
:
while
True
:
data
=
video_file
.
read
(
chunk_size
)
if
not
data
:
break
stream
.
append
(
data
)
def
stream_generator
():
yield
config_request
for
chunk
in
stream
:
yield
videointelligence
.
StreamingAnnotateVideoRequest
(
input_content
=
chunk
)
requests
=
stream_generator
()
# streaming_annotate_video returns a generator.
# The default timeout is about 300 seconds.
# To process longer videos it should be set to
# larger than the length (in seconds) of the stream.
responses
=
client
.
streaming_annotate_video
(
requests
,
timeout
=
900
)
# Each response corresponds to about 1 second of video.
for
response
in
responses
:
# Check for errors.
if
response
.
error
.
message
:
print
(
response
.
error
.
message
)
break
object_annotations
=
response
.
annotation_results
.
object_annotations
# object_annotations could be empty
if
not
object_annotations
:
continue
for
annotation
in
object_annotations
:
# Each annotation has one frame, which has a timeoffset.
frame
=
annotation
.
frames
[
0
]
time_offset
=
(
frame
.
time_offset
.
seconds
+
frame
.
time_offset
.
microseconds
/
1e6
)
description
=
annotation
.
entity
.
description
confidence
=
annotation
.
confidence
# track_id tracks the same object in the video.
track_id
=
annotation
.
track_id
# description is in Unicode
print
(
"
{}
s"
.
format
(
time_offset
))
print
(
"
\t
Entity description:
{}
"
.
format
(
description
))
print
(
"
\t
Track Id:
{}
"
.
format
(
track_id
))
if
annotation
.
entity
.
entity_id
:
print
(
"
\t
Entity id:
{}
"
.
format
(
annotation
.
entity
.
entity_id
))
print
(
"
\t
Confidence:
{}
"
.
format
(
confidence
))
# Every annotation has only one frame
frame
=
annotation
.
frames
[
0
]
box
=
frame
.
normalized_bounding_box
print
(
"
\t
Bounding box position:"
)
print
(
"
\t
left :
{}
"
.
format
(
box
.
left
))
print
(
"
\t
top :
{}
"
.
format
(
box
.
top
))
print
(
"
\t
right :
{}
"
.
format
(
box
.
right
))
print
(
"
\t
bottom:
{}
\n
"
.
format
(
box
.
bottom
))