Perform a token counting.
Endpoint
post https://{service-endpoint}/v1/{endpoint}:countTokensWhere {service-endpoint}
is one of the supported service endpoints
.
Path parameters
endpoint
string
Required. The name of the Endpoint requested to perform token counting. Format: projects/{project}/locations/{location}/endpoints/{endpoint}
Request body
The request body contains data with the following structure:
model
string
Optional. The name of the publisher model requested to serve the prediction. Format: projects/{project}/locations/{location}/publishers/*/models/*
instances[]
value ( Value
format)
Optional. The instances that are the input to token counting call. Schema is identical to the prediction schema of the underlying model.
contents[]
object ( Content
)
Optional. Input content.
tools[]
object ( Tool
)
Optional. A list of Tools
the model may use to generate the next response.
A Tool
is a piece of code that enables the system to interact with external systems to perform an action, or set of actions, outside of knowledge and scope of the model.
systemInstruction
object ( Content
)
Optional. The user provided system instructions for the model. Note: only text should be used in parts and content in each part will be in a separate paragraph.
Example request
Text
C#
using Google.Cloud.AIPlatform.V1;
using System;
using System.Threading.Tasks;
public class GetTokenCount
{
public async Task<int> CountTokens(
string projectId = "your-project-id",
string location = "us-central1",
string publisher = "google",
string model = "gemini-1.5-flash-001"
)
{
var client = new LlmUtilityServiceClientBuilder
{
Endpoint = $"{location}-aiplatform.googleapis.com"
}.Build();
var request = new CountTokensRequest
{
Endpoint = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
Model = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
Contents =
{
new Content
{
Role = "USER",
Parts = { new Part { Text = "Why is the sky blue?" } }
}
}
};
var response = await client.CountTokensAsync(request);
int tokenCount = response.TotalTokens;
Console.WriteLine($"There are {tokenCount} tokens in the prompt.");
return tokenCount;
}
}
Go
import
(
" context
"
" fmt
"
" io
"
" cloud
.
google
.
com
/
go
/
vertexai
/
genai
" )
// countTokens returns the number of tokens for this prompt.
func
countTokens
(
w
io
.
Writer
,
projectID
,
location
,
modelName
string
)
error
{
// location := "us-central1
"
// modelName := "gemini-1.5-flash-001
"
ctx
:=
context
.
Background
()
prompt
:=
genai
.
Text
(
" Why
is
the
sky
blue
?
" )
client
,
err
:=
genai
.
NewClient
(
ctx
,
projectID
,
location
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
" unable
to
create
client
:
%
w
" ,
err
)
}
defer
client
.
Close
()
model
:=
client
.
GenerativeModel
(
modelName
)
resp
,
err
:=
model
.
CountTokens
(
ctx
,
prompt
)
if
err
!=
nil
{
return
err
}
fmt
.
Fprintf
(
w
,
" Number
of
tokens
for
the
prompt
:
%
d
\
n
" ,
resp
.
TotalTokens
)
resp2
,
err
:=
model
.
GenerateContent
(
ctx
,
prompt
)
if
err
!=
nil
{
return
err
}
fmt
.
Fprintf
(
w
,
" Number
of
tokens
for
the
prompt
:
%
d
\
n
" ,
resp2
.
UsageMetadata
.
PromptTokenCount
)
fmt
.
Fprintf
(
w
,
" Number
of
tokens
for
the
candidates
:
%
d
\
n
" ,
resp2
.
UsageMetadata
.
CandidatesTokenCount
)
fmt
.
Fprintf
(
w
,
" Total
number
of
tokens
:
%
d
\
n
" ,
resp2
.
UsageMetadata
.
TotalTokenCount
)
return
nil
}
Java
import
com.google.cloud.vertexai.VertexAI
;
import
com.google.cloud.vertexai.api.CountTokensResponse
;
import
com.google.cloud.vertexai.api.GenerateContentResponse
;
import
com.google.cloud.vertexai.generativeai.GenerativeModel
;
import
java.io.IOException
;
public
class
GetTokenCount
{
public
static
void
main
(
String
[]
args
)
throws
IOException
{
// TODO(developer): Replace these variables before running the sample.
String
projectId
=
" your
-
google
-
cloud
-
project
-
id
" ;
String
location
=
" us
-
central1
" ;
String
modelName
=
" gemini
-
1.5
-
flash
-
001
" ;
getTokenCount
(
projectId
,
location
,
modelName
);
}
// Gets the number of tokens for the prompt and the model's response.
public
static
int
getTokenCount
(
String
projectId
,
String
location
,
String
modelName
)
throws
IOException
{
// Initialize client that will be used to send requests.
// This client only needs to be created once, and can be reused for multiple requests.
try
(
VertexAI
vertexAI
=
new
VertexAI
(
projectId
,
location
))
{
GenerativeModel
model
=
new
GenerativeModel
(
modelName
,
vertexAI
);
String
textPrompt
=
" Why
is
the
sky
blue
?
" ;
CountTokensResponse
response
=
model
.
countTokens
(
textPrompt
);
int
promptTokenCount
=
response
.
getTotalTokens
();
int
promptCharCount
=
response
.
getTotalBillableCharacters
();
System
.
out
.
println
(
" Prompt
token
Count
:
"
+
promptTokenCount
);
System
.
out
.
println
(
" Prompt
billable
character
count
:
"
+
promptCharCount
);
GenerateContentResponse
contentResponse
=
model
.
generateContent
(
textPrompt
);
int
tokenCount
=
contentResponse
.
getUsageMetadata
().
getPromptTokenCount
();
int
candidateTokenCount
=
contentResponse
.
getUsageMetadata
().
getCandidatesTokenCount
();
int
totalTokenCount
=
contentResponse
.
getUsageMetadata
().
getTotalTokenCount
();
System
.
out
.
println
(
" Prompt
token
Count
:
"
+
tokenCount
);
System
.
out
.
println
(
" Candidate
Token
Count
:
"
+
candidateTokenCount
);
System
.
out
.
println
(
" Total
token
Count
:
"
+
totalTokenCount
);
return
promptTokenCount
;
}
}
}
Node.js
const
{
VertexAI
}
=
require
(
' @
google
-
cloud
/
vertexai
' );
/**
* TODO(developer): Update these variables before running the sample.
*/
async
function
countTokens
(
projectId
=
' PROJECT_ID
' ,
location
=
' us
-
central1
' ,
model
=
' gemini
-
1.5
-
flash
-
001
' )
{
// Initialize Vertex with your Cloud project and location
const
vertexAI
=
new
VertexAI
({
project
:
projectId
,
location
:
location
});
// Instantiate the model
const
generativeModel
=
vertexAI
.
getGenerativeModel
({
model
:
model
,
});
const
req
=
{
contents
:
[{
role
:
' user
' ,
parts
:
[{
text
:
' How
are
you
doing
today
?
' }]}],
};
const
countTokensResp
=
await
generativeModel
.
countTokens
(
req
);
console
.
log
(
' count
tokens
response
:
' ,
countTokensResp
);
}
Python
import
vertexai
from
vertexai.generative_models
import
GenerativeModel
# TODO (developer): update project_id
vertexai
.
init
(
project
=
PROJECT_ID
,
location
=
" us
-
central1
" )
model
=
GenerativeModel
(
" gemini
-
1.5
-
flash
-
001
" )
prompt
=
" Why
is
the
sky
blue
?
" # Prompt tokens count
response
=
model
.
count_tokens
(
prompt
)
print
(
f"Prompt
Token
Count
:
{
response
.
total_tokens
}")
print
(
f"Prompt
Character
Count
:
{
response
.
total_billable_characters
}")
# Send text to Gemini
response
=
model
.
generate_content
(
prompt
)
# Response tokens count
usage_metadata
=
response
.
usage_metadata
print
(
f"Prompt
Token
Count
:
{
usage_metadata
.
prompt_token_count
}")
print
(
f"Candidates
Token
Count
:
{
usage_metadata
.
candidates_token_count
}")
print
(
f"Total
Token
Count
:
{
usage_metadata
.
total_token_count
}")
Multi-modal
Go
import
(
" context
"
" fmt
"
" io
"
" mime
"
" path
/
filepath
"
" cloud
.
google
.
com
/
go
/
vertexai
/
genai
" )
// countTokensMultimodal finds the number of tokens for a multimodal prompt (video+text), and writes to w. Then,
// it calls the model with the multimodal prompt and writes token counts from the response metadata to w.
//
// video is a Google Cloud Storage path starting with "gs://
" func
countTokensMultimodal
(
w
io
.
Writer
,
projectID
,
location
,
modelName
string
)
error
{
// location := "us-central1
"
// modelName := "gemini-1.5-flash-001
"
prompt
:=
" Provide
a
description
of
the
video
.
"
video
:=
" gs
:
//cloud-samples-data/generative-ai/video/pixel8.mp4
"
ctx
:=
context
.
Background
()
client
,
err
:=
genai
.
NewClient
(
ctx
,
projectID
,
location
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
" unable
to
create
client
:
%
w
" ,
err
)
}
defer
client
.
Close
()
model
:=
client
.
GenerativeModel
(
modelName
)
part1
:=
genai
.
Text
(
prompt
)
// Given a video file URL, prepare video file as genai.Part
part2
:=
genai
.
FileData
{
MIMEType
:
mime
.
TypeByExtension
(
filepath
.
Ext
(
video
)),
FileURI
:
video
,
}
// Finds the total number of tokens for the 2 parts (text, video) of the multimodal prompt,
// before actually calling the model for inference.
resp
,
err
:=
model
.
CountTokens
(
ctx
,
part1
,
part2
)
if
err
!=
nil
{
return
err
}
fmt
.
Fprintf
(
w
,
" Number
of
tokens
for
the
multimodal
video
prompt
:
%
d
\
n
" ,
resp
.
TotalTokens
)
res
,
err
:=
model
.
GenerateContent
(
ctx
,
part1
,
part2
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
" unable
to
generate
contents
:
%
w
" ,
err
)
}
// The token counts are also provided in the model response metadata, after inference.
fmt
.
Fprintln
(
w
,
" \
nModel
response
" )
md
:=
res
.
UsageMetadata
fmt
.
Fprintf
(
w
,
" Prompt
Token
Count
:
%
d
\
n
" ,
md
.
PromptTokenCount
)
fmt
.
Fprintf
(
w
,
" Candidates
Token
Count
:
%
d
\
n
" ,
md
.
CandidatesTokenCount
)
fmt
.
Fprintf
(
w
,
" Total
Token
Count
:
%
d
\
n
" ,
md
.
TotalTokenCount
)
return
nil
}
Python
import
vertexai
from
vertexai.generative_models
import
GenerativeModel
,
Part
# TODO (developer): update project_id
vertexai
.
init
(
project
=
PROJECT_ID
,
location
=
" us
-
central1
" )
model
=
GenerativeModel
(
" gemini
-
1.5
-
flash
-
001
" )
contents
=
[
Part
.
from_uri
(
" gs
:
//
cloud
-
samples
-
data
/
generative
-
ai
/
video
/
pixel8
.
mp4
" ,
mime_type
=
" video
/
mp4
" ,
),
" Provide
a
description
of
the
video
.
" ,
]
# Prompt tokens count
response
=
model
.
count_tokens
(
contents
)
print
(
f"Prompt
Token
Count
:
{
response
.
total_tokens
}")
print
(
f"Prompt
Character
Count
:
{
response
.
total_billable_characters
}")
# Send text to Gemini
response
=
model
.
generate_content
(
contents
)
usage_metadata
=
response
.
usage_metadata
# Response tokens count
print
(
f"Prompt
Token
Count
:
{
usage_metadata
.
prompt_token_count
}")
print
(
f"Candidates
Token
Count
:
{
usage_metadata
.
candidates_token_count
}")
print
(
f"Total
Token
Count
:
{
usage_metadata
.
total_token_count
}")
Response body
If successful, the response body contains an instance of CountTokensResponse
.