Starting April 29, 2025, Gemini 1.5 Pro and Gemini 1.5 Flash models are not available in projects that have no prior usage of these models, including new projects. For details, see Model versions and lifecycle .

Summarize a video file with audio with Gemini Multimodal

This sample shows you how to summarize a video file with audio and return chapters with timestamps.

Explore further

For detailed documentation that includes this code sample, see the following:

Quickstart: Generate text using the Vertex AI Gemini API

Code sample

Go

Before trying this sample, follow the Go setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Go API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "io" 
  
 genai 
  
 "google.golang.org/genai" 
 ) 
 // generateWithVideo shows how to generate text using a video input. 
 func 
  
 generateWithVideo 
 ( 
 w 
  
 io 
 . 
 Writer 
 ) 
  
 error 
  
 { 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 genai 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
& genai 
 . 
 ClientConfig 
 { 
  
 HTTPOptions 
 : 
  
 genai 
 . 
 HTTPOptions 
 { 
 APIVersion 
 : 
  
 "v1" 
 }, 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "failed to create genai client: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 modelName 
  
 := 
  
 "gemini-2.5-flash" 
  
 contents 
  
 := 
  
 [] 
 * 
 genai 
 . 
 Content 
 { 
  
 { 
 Parts 
 : 
  
 [] 
 * 
 genai 
 . 
 Part 
 { 
  
 { 
 Text 
 : 
  
 `Analyze the provided video file, including its audio. 
 Summarize the main points of the video concisely. 
 Create a chapter breakdown with timestamps for key sections or topics discussed.` 
 }, 
  
 { 
 FileData 
 : 
  
& genai 
 . 
 FileData 
 { 
  
 FileURI 
 : 
  
 "gs://cloud-samples-data/generative-ai/video/pixel8.mp4" 
 , 
  
 MIMEType 
 : 
  
 "video/mp4" 
 , 
  
 }}, 
  
 }, 
  
 Role 
 : 
  
 "user" 
 }, 
  
 } 
  
 resp 
 , 
  
 err 
  
 := 
  
 client 
 . 
 Models 
 . 
 GenerateContent 
 ( 
 ctx 
 , 
  
 modelName 
 , 
  
 contents 
 , 
  
 nil 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "failed to generate content: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 respText 
  
 := 
  
 resp 
 . 
 Text 
 () 
  
 fmt 
 . 
 Fprintln 
 ( 
 w 
 , 
  
 respText 
 ) 
  
 // Example response: 
  
 // Here's an analysis of the provided video file: 
  
 // 
  
 // **Summary** 
  
 // 
  
 // The video features Saeka Shimada, a photographer in Tokyo, who uses the new Pixel phone ... 
  
 // 
  
 // **Chapter Breakdown** 
  
 // 
  
 // *   **0:00-0:05**: Introduction to Saeka Shimada and her work as a photographer in Tokyo. 
  
 // ... 
  
 return 
  
 nil 
 }

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Java API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.genai.Client 
 ; 
 import 
  
 com.google.genai.types.Content 
 ; 
 import 
  
 com.google.genai.types.GenerateContentResponse 
 ; 
 import 
  
 com.google.genai.types.HttpOptions 
 ; 
 import 
  
 com.google.genai.types.Part 
 ; 
 public 
  
 class 
 TextGenerationWithVideo 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 modelId 
  
 = 
  
 "gemini-2.5-flash" 
 ; 
  
 String 
  
 prompt 
  
 = 
  
 " Analyze the provided video file, including its audio.\n" 
  
 + 
  
 " Summarize the main points of the video concisely.\n" 
  
 + 
  
 " Create a chapter breakdown with timestamps for key sections or topics discussed." 
 ; 
  
 generateContent 
 ( 
 modelId 
 , 
  
 prompt 
 ); 
  
 } 
  
 // Generates text with video input 
  
 public 
  
 static 
  
 String 
  
 generateContent 
 ( 
 String 
  
 modelId 
 , 
  
 String 
  
 prompt 
 ) 
  
 { 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. 
  
 try 
  
 ( 
 Client 
  
 client 
  
 = 
  
 Client 
 . 
 builder 
 () 
  
 . 
 location 
 ( 
 "global" 
 ) 
  
 . 
 vertexAI 
 ( 
 true 
 ) 
  
 . 
 httpOptions 
 ( 
 HttpOptions 
 . 
 builder 
 (). 
 apiVersion 
 ( 
 "v1" 
 ). 
 build 
 ()) 
  
 . 
 build 
 ()) 
  
 { 
  
 GenerateContentResponse 
  
 response 
  
 = 
  
 client 
 . 
 models 
 . 
 generateContent 
 ( 
  
 modelId 
 , 
  
 Content 
 . 
 fromParts 
 ( 
  
 Part 
 . 
 fromText 
 ( 
 prompt 
 ), 
  
 Part 
 . 
 fromUri 
 ( 
  
 "gs://cloud-samples-data/generative-ai/video/pixel8.mp4" 
 , 
  
 "video/mp4" 
 )), 
  
 null 
 ); 
  
 System 
 . 
 out 
 . 
 print 
 ( 
 response 
 . 
 text 
 ()); 
  
 // Example response: 
  
 // Here's a breakdown of the video: 
  
 // 
  
 // **Summary:** 
  
 // 
  
 // Saeka Shimada, a photographer in Tokyo, uses the Google Pixel 8 Pro's "Video Boost" feature 
  
 // to ... 
  
 // 
  
 // **Chapter Breakdown with Timestamps:** 
  
 // 
  
 // * **[00:00-00:12] Introduction & Tokyo at Night:** Saeka Shimada introduces herself ... 
  
 return 
  
 response 
 . 
 text 
 (); 
  
 } 
  
 } 
 }

Node.js

Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Node.js API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  const 
  
 { 
 GoogleGenAI 
 } 
  
 = 
  
 require 
 ( 
 '@google/genai' 
 ); 
 const 
  
 GOOGLE_CLOUD_PROJECT 
  
 = 
  
 process 
 . 
 env 
 . 
 GOOGLE_CLOUD_PROJECT 
 ; 
 const 
  
 GOOGLE_CLOUD_LOCATION 
  
 = 
  
 process 
 . 
 env 
 . 
 GOOGLE_CLOUD_LOCATION 
  
 || 
  
 'global' 
 ; 
 async 
  
 function 
  
 generateContent 
 ( 
  
 projectId 
  
 = 
  
 GOOGLE_CLOUD_PROJECT 
 , 
  
 location 
  
 = 
  
 GOOGLE_CLOUD_LOCATION 
 ) 
  
 { 
  
 const 
  
 ai 
  
 = 
  
 new 
  
 GoogleGenAI 
 ({ 
  
 vertexai 
 : 
  
 true 
 , 
  
 project 
 : 
  
 projectId 
 , 
  
 location 
 : 
  
 location 
 , 
  
 }); 
  
 const 
  
 prompt 
  
 = 
  
 ` 
 Analyze the provided video file, including its audio. 
 Summarize the main points of the video concisely. 
 Create a chapter breakdown with timestamps for key sections or topics discussed. 
 ` 
 ; 
  
 const 
  
 video 
  
 = 
  
 { 
  
 fileData 
 : 
  
 { 
  
 fileUri 
 : 
  
 'gs://cloud-samples-data/generative-ai/video/pixel8.mp4' 
 , 
  
 mimeType 
 : 
  
 'video/mp4' 
 , 
  
 }, 
  
 }; 
  
 const 
  
 response 
  
 = 
  
 await 
  
 ai 
 . 
 models 
 . 
 generateContent 
 ({ 
  
 model 
 : 
  
 'gemini-2.5-flash' 
 , 
  
 contents 
 : 
  
 [ 
 video 
 , 
  
 prompt 
 ], 
  
 }); 
  
 console 
 . 
 log 
 ( 
 response 
 . 
 text 
 ); 
  
 return 
  
 response 
 . 
 text 
 ; 
 }

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries . For more information, see the Vertex AI Python API reference documentation .

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google 
  
 import 
 genai 
 from 
  
 google.genai.types 
  
 import 
 HttpOptions 
 , 
 Part 
 client 
 = 
 genai 
 . 
 Client 
 ( 
 http_options 
 = 
 HttpOptions 
 ( 
 api_version 
 = 
 "v1" 
 )) 
 prompt 
 = 
 """ 
 Analyze the provided video file, including its audio. 
 Summarize the main points of the video concisely. 
 Create a chapter breakdown with timestamps for key sections or topics discussed. 
 """ 
 response 
 = 
 client 
 . 
 models 
 . 
 generate_content 
 ( 
 model 
 = 
 "gemini-2.5-flash" 
 , 
 contents 
 = 
 [ 
 Part 
 . 
 from_uri 
 ( 
 file_uri 
 = 
 "gs://cloud-samples-data/generative-ai/video/pixel8.mp4" 
 , 
 mime_type 
 = 
 "video/mp4" 
 , 
 ), 
 prompt 
 , 
 ], 
 ) 
 print 
 ( 
 response 
 . 
 text 
 ) 
 # Example response: 
 # Here's a breakdown of the video: 
 # 
 # **Summary:** 
 # 
 # Saeka Shimada, a photographer in Tokyo, uses the Google Pixel 8 Pro's "Video Boost" feature to ... 
 # 
 # **Chapter Breakdown with Timestamps:** 
 # 
 # * **[00:00-00:12] Introduction & Tokyo at Night:** Saeka Shimada introduces herself ... 
 # ...

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Summarize a video file with audio with Gemini Multimodal Stay organized with collections Save and categorize content based on your preferences.

Explore further

Code sample

Go

Java

Node.js

Python

What's next

Summarize a video file with audio with Gemini Multimodal