Method: endpoints.countTokens

Perform a token counting.

Endpoint

post https://{service-endpoint}/v1/{endpoint}:countTokens

Where {service-endpoint} is one of the supported service endpoints .

Path parameters

endpoint string

Required. The name of the Endpoint requested to perform token counting. Format: projects/{project}/locations/{location}/endpoints/{endpoint}

Request body

The request body contains data with the following structure:

Fields
model string

Optional. The name of the publisher model requested to serve the prediction. Format: projects/{project}/locations/{location}/publishers/*/models/*

instances[] value ( Value format)

Optional. The instances that are the input to token counting call. Schema is identical to the prediction schema of the underlying model.

contents[] object ( Content )

Optional. Input content.

tools[] object ( Tool )

Optional. A list of Tools the model may use to generate the next response.

A Tool is a piece of code that enables the system to interact with external systems to perform an action, or set of actions, outside of knowledge and scope of the model.

systemInstruction object ( Content )

Optional. The user provided system instructions for the model. Note: only text should be used in parts and content in each part will be in a separate paragraph.

Example request

Text

C#

 using Google.Cloud.AIPlatform.V1;
using System;
using System.Threading.Tasks;

public class GetTokenCount
{
    public async Task<int> CountTokens(
        string projectId = "your-project-id",
        string location = "us-central1",
        string publisher = "google",
        string model = "gemini-1.5-flash-001"
    )
    {
        var client = new LlmUtilityServiceClientBuilder
        {
            Endpoint = $"{location}-aiplatform.googleapis.com"
        }.Build();

        var request = new CountTokensRequest
        {
            Endpoint = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
            Model = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
            Contents =
            {
                new Content
                {
                    Role = "USER",
                    Parts = { new Part { Text = "Why is the sky blue?" } }
                }
            }
        };

        var response = await client.CountTokensAsync(request);
        int tokenCount = response.TotalTokens;
        Console.WriteLine($"There are {tokenCount} tokens in the prompt.");
        return tokenCount;
    }
}  
 

Go

  import 
  
 ( 
  
" context 
"  
" fmt 
"  
" io 
"  
" cloud 
 . 
 google 
 . 
 com 
 / 
 go 
 / 
 vertexai 
 / 
 genai 
" ) 
 // countTokens returns the number of tokens for this prompt. 
 func 
  
 countTokens 
 ( 
 w 
  
 io 
 . 
 Writer 
 , 
  
 projectID 
 , 
  
 location 
 , 
  
 modelName 
  
 string 
 ) 
  
 error 
  
 { 
  
 // location := "us-central1 
"  
 // modelName := "gemini-1.5-flash-001 
"  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 prompt 
  
 := 
  
 genai 
 . 
 Text 
 ( 
" Why 
  
 is 
  
 the 
  
 sky 
  
 blue 
 ? 
" ) 
  
 client 
 , 
  
 err 
  
 := 
  
 genai 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
 projectID 
 , 
  
 location 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
" unable 
  
 to 
  
 create 
  
 client 
 : 
  
 % 
 w 
" , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 model 
  
 := 
  
 client 
 . 
 GenerativeModel 
 ( 
 modelName 
 ) 
  
 resp 
 , 
  
 err 
  
 := 
  
 model 
 . 
 CountTokens 
 ( 
 ctx 
 , 
  
 prompt 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Number 
  
 of 
  
 tokens 
  
 for 
  
 the 
  
 prompt 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 resp 
 . 
 TotalTokens 
 ) 
  
 resp2 
 , 
  
 err 
  
 := 
  
 model 
 . 
 GenerateContent 
 ( 
 ctx 
 , 
  
 prompt 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Number 
  
 of 
  
 tokens 
  
 for 
  
 the 
  
 prompt 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 resp2 
 . 
 UsageMetadata 
 . 
 PromptTokenCount 
 ) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Number 
  
 of 
  
 tokens 
  
 for 
  
 the 
  
 candidates 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 resp2 
 . 
 UsageMetadata 
 . 
 CandidatesTokenCount 
 ) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Total 
  
 number 
  
 of 
  
 tokens 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 resp2 
 . 
 UsageMetadata 
 . 
 TotalTokenCount 
 ) 
  
 return 
  
 nil 
 } 
  
 

Java

  import 
  
 com.google.cloud.vertexai.VertexAI 
 ; 
 import 
  
 com.google.cloud.vertexai.api.CountTokensResponse 
 ; 
 import 
  
 com.google.cloud.vertexai.api.GenerateContentResponse 
 ; 
 import 
  
 com.google.cloud.vertexai.generativeai.GenerativeModel 
 ; 
 import 
  
 java.io.IOException 
 ; 
 public 
  
 class 
 GetTokenCount 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 throws 
  
 IOException 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 projectId 
  
 = 
  
" your 
 - 
 google 
 - 
 cloud 
 - 
 project 
 - 
 id 
" ; 
  
 String 
  
 location 
  
 = 
  
" us 
 - 
 central1 
" ; 
  
 String 
  
 modelName 
  
 = 
  
" gemini 
 - 
 1.5 
 - 
 flash 
 - 
 001 
" ; 
  
 getTokenCount 
 ( 
 projectId 
 , 
  
 location 
 , 
  
 modelName 
 ); 
  
 } 
  
 // Gets the number of tokens for the prompt and the model's response. 
  
 public 
  
 static 
  
 int 
  
 getTokenCount 
 ( 
 String 
  
 projectId 
 , 
  
 String 
  
 location 
 , 
  
 String 
  
 modelName 
 ) 
  
 throws 
  
 IOException 
  
 { 
  
 // Initialize client that will be used to send requests. 
  
 // This client only needs to be created once, and can be reused for multiple requests. 
  
 try 
  
 ( 
 VertexAI 
  
 vertexAI 
  
 = 
  
 new 
  
 VertexAI 
 ( 
 projectId 
 , 
  
 location 
 )) 
  
 { 
  
 GenerativeModel 
  
 model 
  
 = 
  
 new 
  
 GenerativeModel 
 ( 
 modelName 
 , 
  
 vertexAI 
 ); 
  
 String 
  
 textPrompt 
  
 = 
  
" Why 
  
 is 
  
 the 
  
 sky 
  
 blue 
 ? 
" ; 
  
 CountTokensResponse 
  
 response 
  
 = 
  
 model 
 . 
 countTokens 
 ( 
 textPrompt 
 ); 
  
 int 
  
 promptTokenCount 
  
 = 
  
 response 
 . 
 getTotalTokens 
 (); 
  
 int 
  
 promptCharCount 
  
 = 
  
 response 
 . 
 getTotalBillableCharacters 
 (); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
" Prompt 
  
 token 
  
 Count 
 : 
 " 
 + 
  
 promptTokenCount 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
" Prompt 
  
 billable 
  
 character 
  
 count 
 : 
 " 
 + 
  
 promptCharCount 
 ); 
  
 GenerateContentResponse 
  
 contentResponse 
  
 = 
  
 model 
 . 
 generateContent 
 ( 
 textPrompt 
 ); 
  
 int 
  
 tokenCount 
  
 = 
  
 contentResponse 
 . 
 getUsageMetadata 
 (). 
 getPromptTokenCount 
 (); 
  
 int 
  
 candidateTokenCount 
  
 = 
  
 contentResponse 
 . 
 getUsageMetadata 
 (). 
 getCandidatesTokenCount 
 (); 
  
 int 
  
 totalTokenCount 
  
 = 
  
 contentResponse 
 . 
 getUsageMetadata 
 (). 
 getTotalTokenCount 
 (); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
" Prompt 
  
 token 
  
 Count 
 : 
 " 
 + 
  
 tokenCount 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
" Candidate 
  
 Token 
  
 Count 
 : 
 " 
 + 
  
 candidateTokenCount 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
" Total 
  
 token 
  
 Count 
 : 
 " 
 + 
  
 totalTokenCount 
 ); 
  
 return 
  
 promptTokenCount 
 ; 
  
 } 
  
 } 
 } 
  
 

Node.js

  const 
  
 { 
 VertexAI 
 } 
  
 = 
  
 require 
 ( 
' @ 
 google 
 - 
 cloud 
 / 
 vertexai 
' ); 
 /** 
 * TODO(developer): Update these variables before running the sample. 
 */ 
 async 
  
 function 
  
 countTokens 
 ( 
  
 projectId 
  
 = 
  
' PROJECT_ID 
' , 
  
 location 
  
 = 
  
' us 
 - 
 central1 
' , 
  
 model 
  
 = 
  
' gemini 
 - 
 1.5 
 - 
 flash 
 - 
 001 
' ) 
  
 { 
  
 // Initialize Vertex with your Cloud project and location 
  
 const 
  
 vertexAI 
  
 = 
  
 new 
  
 VertexAI 
 ({ 
 project 
 : 
  
 projectId 
 , 
  
 location 
 : 
  
 location 
 }); 
  
 // Instantiate the model 
  
 const 
  
 generativeModel 
  
 = 
  
 vertexAI 
 . 
 getGenerativeModel 
 ({ 
  
 model 
 : 
  
 model 
 , 
  
 }); 
  
 const 
  
 req 
  
 = 
  
 { 
  
 contents 
 : 
  
 [{ 
 role 
 : 
  
' user 
' , 
  
 parts 
 : 
  
 [{ 
 text 
 : 
  
' How 
  
 are 
  
 you 
  
 doing 
  
 today 
 ? 
' }]}], 
  
 }; 
  
 const 
  
 countTokensResp 
  
 = 
  
 await 
  
 generativeModel 
 . 
 countTokens 
 ( 
 req 
 ); 
  
 console 
 . 
 log 
 ( 
' count 
  
 tokens 
  
 response 
 : 
  
' , 
  
 countTokensResp 
 ); 
 } 
  
 

Python

  import 
 vertexai 
 from 
 vertexai.generative_models 
 import 
 GenerativeModel 
 # TODO (developer): update project_id 
 vertexai 
 . 
 init 
 ( 
 project 
 = 
 PROJECT_ID 
 , 
 location 
 = 
" us 
 - 
 central1 
" ) 
 model 
 = 
 GenerativeModel 
 ( 
" gemini 
 - 
 1.5 
 - 
 flash 
 - 
 001 
" ) 
 prompt 
 = 
" Why 
 is 
 the 
 sky 
 blue 
 ? 
" # Prompt tokens count 
 response 
 = 
 model 
 . 
 count_tokens 
 ( 
 prompt 
 ) 
 print 
 ( 
 f"Prompt 
 Token 
 Count 
 : 
 { 
 response 
 . 
 total_tokens 
 }") 
 print 
 ( 
 f"Prompt 
 Character 
 Count 
 : 
 { 
 response 
 . 
 total_billable_characters 
 }") 
 # Send text to Gemini 
 response 
 = 
 model 
 . 
 generate_content 
 ( 
 prompt 
 ) 
 # Response tokens count 
 usage_metadata 
 = 
 response 
 . 
 usage_metadata 
 print 
 ( 
 f"Prompt 
 Token 
 Count 
 : 
 { 
 usage_metadata 
 . 
 prompt_token_count 
 }") 
 print 
 ( 
 f"Candidates 
 Token 
 Count 
 : 
 { 
 usage_metadata 
 . 
 candidates_token_count 
 }") 
 print 
 ( 
 f"Total 
 Token 
 Count 
 : 
 { 
 usage_metadata 
 . 
 total_token_count 
 }") 
  
 

Multi-modal

Go

  import 
  
 ( 
  
" context 
"  
" fmt 
"  
" io 
"  
" mime 
"  
" path 
 / 
 filepath 
"  
" cloud 
 . 
 google 
 . 
 com 
 / 
 go 
 / 
 vertexai 
 / 
 genai 
" ) 
 // countTokensMultimodal finds the number of tokens for a multimodal prompt (video+text), and writes to w. Then, 
 // it calls the model with the multimodal prompt and writes token counts from the response metadata to w. 
 // 
 // video is a Google Cloud Storage path starting with "gs:// 
" func 
  
 countTokensMultimodal 
 ( 
 w 
  
 io 
 . 
 Writer 
 , 
  
 projectID 
 , 
  
 location 
 , 
  
 modelName 
  
 string 
 ) 
  
 error 
  
 { 
  
 // location := "us-central1 
"  
 // modelName := "gemini-1.5-flash-001 
"  
 prompt 
  
 := 
  
" Provide 
  
 a 
  
 description 
  
 of 
  
 the 
  
 video 
 . 
"  
 video 
  
 := 
  
" gs 
 : 
 //cloud-samples-data/generative-ai/video/pixel8.mp4 
"  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 genai 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
 projectID 
 , 
  
 location 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
" unable 
  
 to 
  
 create 
  
 client 
 : 
  
 % 
 w 
" , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 model 
  
 := 
  
 client 
 . 
 GenerativeModel 
 ( 
 modelName 
 ) 
  
 part1 
  
 := 
  
 genai 
 . 
 Text 
 ( 
 prompt 
 ) 
  
 // Given a video file URL, prepare video file as genai.Part 
  
 part2 
  
 := 
  
 genai 
 . 
 FileData 
 { 
  
 MIMEType 
 : 
  
 mime 
 . 
 TypeByExtension 
 ( 
 filepath 
 . 
 Ext 
 ( 
 video 
 )), 
  
 FileURI 
 : 
  
 video 
 , 
  
 } 
  
 // Finds the total number of tokens for the 2 parts (text, video) of the multimodal prompt, 
  
 // before actually calling the model for inference. 
  
 resp 
 , 
  
 err 
  
 := 
  
 model 
 . 
 CountTokens 
 ( 
 ctx 
 , 
  
 part1 
 , 
  
 part2 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Number 
  
 of 
  
 tokens 
  
 for 
  
 the 
  
 multimodal 
  
 video 
  
 prompt 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 resp 
 . 
 TotalTokens 
 ) 
  
 res 
 , 
  
 err 
  
 := 
  
 model 
 . 
 GenerateContent 
 ( 
 ctx 
 , 
  
 part1 
 , 
  
 part2 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
" unable 
  
 to 
  
 generate 
  
 contents 
 : 
  
 % 
 w 
" , 
  
 err 
 ) 
  
 } 
  
 // The token counts are also provided in the model response metadata, after inference. 
  
 fmt 
 . 
 Fprintln 
 ( 
 w 
 , 
  
" \ 
 nModel 
  
 response 
" ) 
  
 md 
  
 := 
  
 res 
 . 
 UsageMetadata 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Prompt 
  
 Token 
  
 Count 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 md 
 . 
 PromptTokenCount 
 ) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Candidates 
  
 Token 
  
 Count 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 md 
 . 
 CandidatesTokenCount 
 ) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
" Total 
  
 Token 
  
 Count 
 : 
  
 % 
 d 
 \ 
 n 
" , 
  
 md 
 . 
 TotalTokenCount 
 ) 
  
 return 
  
 nil 
 } 
  
 

Python

  import 
 vertexai 
 from 
 vertexai.generative_models 
 import 
 GenerativeModel 
 , 
 Part 
 # TODO (developer): update project_id 
 vertexai 
 . 
 init 
 ( 
 project 
 = 
 PROJECT_ID 
 , 
 location 
 = 
" us 
 - 
 central1 
" ) 
 model 
 = 
 GenerativeModel 
 ( 
" gemini 
 - 
 1.5 
 - 
 flash 
 - 
 001 
" ) 
 contents 
 = 
 [ 
 Part 
 . 
 from_uri 
 ( 
" gs 
 : 
 // 
 cloud 
 - 
 samples 
 - 
 data 
 / 
 generative 
 - 
 ai 
 / 
 video 
 / 
 pixel8 
 . 
 mp4 
" , 
 mime_type 
 = 
" video 
 / 
 mp4 
" , 
 ), 
" Provide 
 a 
 description 
 of 
 the 
 video 
 . 
" , 
 ] 
 # Prompt tokens count 
 response 
 = 
 model 
 . 
 count_tokens 
 ( 
 contents 
 ) 
 print 
 ( 
 f"Prompt 
 Token 
 Count 
 : 
 { 
 response 
 . 
 total_tokens 
 }") 
 print 
 ( 
 f"Prompt 
 Character 
 Count 
 : 
 { 
 response 
 . 
 total_billable_characters 
 }") 
 # Send text to Gemini 
 response 
 = 
 model 
 . 
 generate_content 
 ( 
 contents 
 ) 
 usage_metadata 
 = 
 response 
 . 
 usage_metadata 
 # Response tokens count 
 print 
 ( 
 f"Prompt 
 Token 
 Count 
 : 
 { 
 usage_metadata 
 . 
 prompt_token_count 
 }") 
 print 
 ( 
 f"Candidates 
 Token 
 Count 
 : 
 { 
 usage_metadata 
 . 
 candidates_token_count 
 }") 
 print 
 ( 
 f"Total 
 Token 
 Count 
 : 
 { 
 usage_metadata 
 . 
 total_token_count 
 }") 
  
 

Response body

If successful, the response body contains an instance of CountTokensResponse .