Summarize a video file with audio with Gemini Multimodal

This sample shows you how to summarize a video file with audio and return chapters with timestamps.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Go

Before trying this sample, follow the Go setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Go API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "io" 
  
 genai 
  
 "google.golang.org/genai" 
 ) 
 // generateWithVideo shows how to generate text using a video input. 
 func 
  
 generateWithVideo 
 ( 
 w 
  
 io 
 . 
 Writer 
 ) 
  
 error 
  
 { 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 genai 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
& genai 
 . 
 ClientConfig 
 { 
  
 HTTPOptions 
 : 
  
 genai 
 . 
 HTTPOptions 
 { 
 APIVersion 
 : 
  
 "v1" 
 }, 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "failed to create genai client: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 modelName 
  
 := 
  
 "gemini-2.5-flash" 
  
 contents 
  
 := 
  
 [] 
 * 
 genai 
 . 
 Content 
 { 
  
 { 
 Parts 
 : 
  
 [] 
 * 
 genai 
 . 
 Part 
 { 
  
 { 
 Text 
 : 
  
 `Analyze the provided video file, including its audio. 
 Summarize the main points of the video concisely. 
 Create a chapter breakdown with timestamps for key sections or topics discussed.` 
 }, 
  
 { 
 FileData 
 : 
  
& genai 
 . 
 FileData 
 { 
  
 FileURI 
 : 
  
 "gs://cloud-samples-data/generative-ai/video/pixel8.mp4" 
 , 
  
 MIMEType 
 : 
  
 "video/mp4" 
 , 
  
 }}, 
  
 }, 
  
 Role 
 : 
  
 "user" 
 }, 
  
 } 
  
 resp 
 , 
  
 err 
  
 := 
  
 client 
 . 
 Models 
 . 
 GenerateContent 
 ( 
 ctx 
 , 
  
 modelName 
 , 
  
 contents 
 , 
  
 nil 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "failed to generate content: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 respText 
  
 := 
  
 resp 
 . 
 Text 
 () 
  
 fmt 
 . 
 Fprintln 
 ( 
 w 
 , 
  
 respText 
 ) 
  
 // Example response: 
  
 // Here's an analysis of the provided video file: 
  
 // 
  
 // **Summary** 
  
 // 
  
 // The video features Saeka Shimada, a photographer in Tokyo, who uses the new Pixel phone ... 
  
 // 
  
 // **Chapter Breakdown** 
  
 // 
  
 // *   **0:00-0:05**: Introduction to Saeka Shimada and her work as a photographer in Tokyo. 
  
 // ... 
  
 return 
  
 nil 
 } 
 

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.genai.Client 
 ; 
 import 
  
 com.google.genai.types.Content 
 ; 
 import 
  
 com.google.genai.types.GenerateContentResponse 
 ; 
 import 
  
 com.google.genai.types.HttpOptions 
 ; 
 import 
  
 com.google.genai.types.Part 
 ; 
 public 
  
 class 
 TextGenerationWithVideo 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 modelId 
  
 = 
  
 "gemini-2.5-flash" 
 ; 
  
 String 
  
 prompt 
  
 = 
  
 " Analyze the provided video file, including its audio.\n" 
  
 + 
  
 " Summarize the main points of the video concisely.\n" 
  
 + 
  
 " Create a chapter breakdown with timestamps for key sections or topics discussed." 
 ; 
  
 generateContent 
 ( 
 modelId 
 , 
  
 prompt 
 ); 
  
 } 
  
 // Generates text with video input 
  
 public 
  
 static 
  
 String 
  
 generateContent 
 ( 
 String 
  
 modelId 
 , 
  
 String 
  
 prompt 
 ) 
  
 { 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. 
  
 try 
  
 ( 
 Client 
  
 client 
  
 = 
  
 Client 
 . 
 builder 
 () 
  
 . 
 location 
 ( 
 "global" 
 ) 
  
 . 
 vertexAI 
 ( 
 true 
 ) 
  
 . 
 httpOptions 
 ( 
 HttpOptions 
 . 
 builder 
 (). 
 apiVersion 
 ( 
 "v1" 
 ). 
 build 
 ()) 
  
 . 
 build 
 ()) 
  
 { 
  
 GenerateContentResponse 
  
 response 
  
 = 
  
 client 
 . 
 models 
 . 
 generateContent 
 ( 
  
 modelId 
 , 
  
 Content 
 . 
 fromParts 
 ( 
  
 Part 
 . 
 fromText 
 ( 
 prompt 
 ), 
  
 Part 
 . 
 fromUri 
 ( 
  
 "gs://cloud-samples-data/generative-ai/video/pixel8.mp4" 
 , 
  
 "video/mp4" 
 )), 
  
 null 
 ); 
  
 System 
 . 
 out 
 . 
 print 
 ( 
 response 
 . 
 text 
 ()); 
  
 // Example response: 
  
 // Here's a breakdown of the video: 
  
 // 
  
 // **Summary:** 
  
 // 
  
 // Saeka Shimada, a photographer in Tokyo, uses the Google Pixel 8 Pro's "Video Boost" feature 
  
 // to ... 
  
 // 
  
 // **Chapter Breakdown with Timestamps:** 
  
 // 
  
 // * **[00:00-00:12] Introduction & Tokyo at Night:** Saeka Shimada introduces herself ... 
  
 return 
  
 response 
 . 
 text 
 (); 
  
 } 
  
 } 
 } 
 

Node.js

Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Node.js API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  const 
  
 { 
 GoogleGenAI 
 } 
  
 = 
  
 require 
 ( 
 '@google/genai' 
 ); 
 const 
  
 GOOGLE_CLOUD_PROJECT 
  
 = 
  
 process 
 . 
 env 
 . 
 GOOGLE_CLOUD_PROJECT 
 ; 
 const 
  
 GOOGLE_CLOUD_LOCATION 
  
 = 
  
 process 
 . 
 env 
 . 
 GOOGLE_CLOUD_LOCATION 
  
 || 
  
 'global' 
 ; 
 async 
  
 function 
  
 generateContent 
 ( 
  
 projectId 
  
 = 
  
 GOOGLE_CLOUD_PROJECT 
 , 
  
 location 
  
 = 
  
 GOOGLE_CLOUD_LOCATION 
 ) 
  
 { 
  
 const 
  
 ai 
  
 = 
  
 new 
  
 GoogleGenAI 
 ({ 
  
 vertexai 
 : 
  
 true 
 , 
  
 project 
 : 
  
 projectId 
 , 
  
 location 
 : 
  
 location 
 , 
  
 }); 
  
 const 
  
 prompt 
  
 = 
  
 ` 
 Analyze the provided video file, including its audio. 
 Summarize the main points of the video concisely. 
 Create a chapter breakdown with timestamps for key sections or topics discussed. 
 ` 
 ; 
  
 const 
  
 video 
  
 = 
  
 { 
  
 fileData 
 : 
  
 { 
  
 fileUri 
 : 
  
 'gs://cloud-samples-data/generative-ai/video/pixel8.mp4' 
 , 
  
 mimeType 
 : 
  
 'video/mp4' 
 , 
  
 }, 
  
 }; 
  
 const 
  
 response 
  
 = 
  
 await 
  
 ai 
 . 
 models 
 . 
 generateContent 
 ({ 
  
 model 
 : 
  
 'gemini-2.5-flash' 
 , 
  
 contents 
 : 
  
 [ 
 video 
 , 
  
 prompt 
 ], 
  
 }); 
  
 console 
 . 
 log 
 ( 
 response 
 . 
 text 
 ); 
  
 return 
  
 response 
 . 
 text 
 ; 
 } 
 

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google 
  
 import 
 genai 
 from 
  
 google.genai.types 
  
 import 
 HttpOptions 
 , 
 Part 
 client 
 = 
 genai 
 . 
 Client 
 ( 
 http_options 
 = 
 HttpOptions 
 ( 
 api_version 
 = 
 "v1" 
 )) 
 prompt 
 = 
 """ 
 Analyze the provided video file, including its audio. 
 Summarize the main points of the video concisely. 
 Create a chapter breakdown with timestamps for key sections or topics discussed. 
 """ 
 response 
 = 
 client 
 . 
 models 
 . 
 generate_content 
 ( 
 model 
 = 
 "gemini-2.5-flash" 
 , 
 contents 
 = 
 [ 
 Part 
 . 
 from_uri 
 ( 
 file_uri 
 = 
 "gs://cloud-samples-data/generative-ai/video/pixel8.mp4" 
 , 
 mime_type 
 = 
 "video/mp4" 
 , 
 ), 
 prompt 
 , 
 ], 
 ) 
 print 
 ( 
 response 
 . 
 text 
 ) 
 # Example response: 
 # Here's a breakdown of the video: 
 # 
 # **Summary:** 
 # 
 # Saeka Shimada, a photographer in Tokyo, uses the Google Pixel 8 Pro's "Video Boost" feature to ... 
 # 
 # **Chapter Breakdown with Timestamps:** 
 # 
 # * **[00:00-00:12] Introduction & Tokyo at Night:** Saeka Shimada introduces herself ... 
 # ... 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: