Make an audio transcription request

Transcribe a local audio file synchronously.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Go

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Go API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Sample speech-quickstart uses the Google Cloud Speech API to transcribe 
 // audio. 
 package 
  
 main 
 import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "log" 
  
 speech 
  
 "cloud.google.com/go/speech/apiv1" 
  
 "cloud.google.com/go/speech/apiv1/speechpb" 
 ) 
 func 
  
 main 
 () 
  
 { 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 // Creates a client. 
  
 client 
 , 
  
 err 
  
 := 
  
 speech 
 . 
  NewClient 
 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 log 
 . 
 Fatalf 
 ( 
 "Failed to create client: %v" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 // The path to the remote audio file to transcribe. 
  
 fileURI 
  
 := 
  
 "gs://cloud-samples-data/speech/brooklyn_bridge.raw" 
  
 // Detects speech in the audio file. 
  
 resp 
 , 
  
 err 
  
 := 
  
 client 
 . 
 Recognize 
 ( 
 ctx 
 , 
  
& speechpb 
 . 
 RecognizeRequest 
 { 
  
 Config 
 : 
  
& speechpb 
 . 
 RecognitionConfig 
 { 
  
 Encoding 
 : 
  
 speechpb 
 . 
  RecognitionConfig_LINEAR16 
 
 , 
  
 SampleRateHertz 
 : 
  
 16000 
 , 
  
 LanguageCode 
 : 
  
 "en-US" 
 , 
  
 }, 
  
 Audio 
 : 
  
& speechpb 
 . 
 RecognitionAudio 
 { 
  
 AudioSource 
 : 
  
& speechpb 
 . 
 RecognitionAudio_Uri 
 { 
 Uri 
 : 
  
 fileURI 
 }, 
  
 }, 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 log 
 . 
 Fatalf 
 ( 
 "failed to recognize: %v" 
 , 
  
 err 
 ) 
  
 } 
  
 // Prints the results. 
  
 for 
  
 _ 
 , 
  
 result 
  
 := 
  
 range 
  
 resp 
 . 
 Results 
  
 { 
  
 for 
  
 _ 
 , 
  
 alt 
  
 := 
  
 range 
  
 result 
 . 
 Alternatives 
  
 { 
  
 fmt 
 . 
 Printf 
 ( 
 "\"%v\" (confidence=%3f)\n" 
 , 
  
 alt 
 . 
 Transcript 
 , 
  
 alt 
 . 
 Confidence 
 ) 
  
 } 
  
 } 
 }

Java

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Imports the Google Cloud client library 
 import 
  
 com.google.cloud.speech.v1. RecognitionAudio 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. RecognitionConfig 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. RecognitionConfig 
. AudioEncoding 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. RecognizeResponse 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. SpeechClient 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. SpeechRecognitionAlternative 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. SpeechRecognitionResult 
 
 ; 
 import 
  
 java.util.List 
 ; 
 public 
  
 class 
 QuickstartSample 
  
 { 
  
 /** Demonstrates using the Speech API to transcribe an audio file. */ 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 ... 
  
 args 
 ) 
  
 throws 
  
 Exception 
  
 { 
  
 // Instantiates a client 
  
 try 
  
 ( 
  SpeechClient 
 
  
 speechClient 
  
 = 
  
  SpeechClient 
 
 . 
 create 
 ()) 
  
 { 
  
 // The path to the audio file to transcribe 
  
 String 
  
 gcsUri 
  
 = 
  
 "gs://cloud-samples-data/speech/brooklyn_bridge.raw" 
 ; 
  
 // Builds the sync recognize request 
  
  RecognitionConfig 
 
  
 config 
  
 = 
  
  RecognitionConfig 
 
 . 
 newBuilder 
 () 
  
 . 
  setEncoding 
 
 ( 
  AudioEncoding 
 
 . 
 LINEAR16 
 ) 
  
 . 
  setSampleRateHertz 
 
 ( 
 16000 
 ) 
  
 . 
 setLanguageCode 
 ( 
 "en-US" 
 ) 
  
 . 
 build 
 (); 
  
  RecognitionAudio 
 
  
 audio 
  
 = 
  
  RecognitionAudio 
 
 . 
 newBuilder 
 (). 
 setUri 
 ( 
 gcsUri 
 ). 
 build 
 (); 
  
 // Performs speech recognition on the audio file 
  
  RecognizeResponse 
 
  
 response 
  
 = 
  
 speechClient 
 . 
 recognize 
 ( 
 config 
 , 
  
 audio 
 ); 
  
 List<SpeechRecognitionResult> 
  
 results 
  
 = 
  
 response 
 . 
  getResultsList 
 
 (); 
  
 for 
  
 ( 
  SpeechRecognitionResult 
 
  
 result 
  
 : 
  
 results 
 ) 
  
 { 
  
 // There can be several alternative transcripts for a given chunk of speech. Just use the 
  
 // first (most likely) one here. 
  
  SpeechRecognitionAlternative 
 
  
 alternative 
  
 = 
  
 result 
 . 
 getAlternativesList 
 (). 
 get 
 ( 
 0 
 ); 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Transcription: %s%n" 
 , 
  
 alternative 
 . 
  getTranscript 
 
 ()); 
  
 } 
  
 } 
  
 } 
 }

Node.js

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Node.js API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Imports the Google Cloud client library 
 const 
  
 speech 
  
 = 
  
 require 
 ( 
 ' @google-cloud/speech 
' 
 ); 
 // Creates a client 
 const 
  
 client 
  
 = 
  
 new 
  
 speech 
 . 
  SpeechClient 
 
 (); 
 async 
  
 function 
  
 quickstart 
 () 
  
 { 
  
 // The path to the remote LINEAR16 file 
  
 const 
  
 gcsUri 
  
 = 
  
 'gs://cloud-samples-data/speech/brooklyn_bridge.raw' 
 ; 
  
 // The audio file's encoding, sample rate in hertz, and BCP-47 language code 
  
 const 
  
 audio 
  
 = 
  
 { 
  
 uri 
 : 
  
 gcsUri 
 , 
  
 }; 
  
 const 
  
 config 
  
 = 
  
 { 
  
 encoding 
 : 
  
 'LINEAR16' 
 , 
  
 sampleRateHertz 
 : 
  
 16000 
 , 
  
 languageCode 
 : 
  
 'en-US' 
 , 
  
 }; 
  
 const 
  
 request 
  
 = 
  
 { 
  
 audio 
 : 
  
 audio 
 , 
  
 config 
 : 
  
 config 
 , 
  
 }; 
  
 // Detects speech in the audio file 
  
 const 
  
 [ 
 response 
 ] 
  
 = 
  
 await 
  
 client 
 . 
 recognize 
 ( 
 request 
 ); 
  
 const 
  
 transcription 
  
 = 
  
 response 
 . 
 results 
  
 . 
 map 
 ( 
 result 
  
 = 
>  
 result 
 . 
 alternatives 
 [ 
 0 
 ]. 
 transcript 
 ) 
  
 . 
 join 
 ( 
 '\n' 
 ); 
  
 console 
 . 
 log 
 ( 
 `Transcription: 
 ${ 
 transcription 
 } 
 ` 
 ); 
 } 
 quickstart 
 ();

PHP

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  # Includes the autoloader for libraries installed with composer 
 require __DIR__ . '/vendor/autoload.php'; 
 # Imports the Google Cloud client library 
 use Google\Cloud\Speech\V1\SpeechClient; 
 use Google\Cloud\Speech\V1\RecognitionAudio; 
 use Google\Cloud\Speech\V1\RecognitionConfig; 
 use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding; 
 # The name of the audio file to transcribe 
 $gcsURI = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw'; 
 # set string as audio content 
 $audio = (new RecognitionAudio()) 
 ->setUri($gcsURI); 
 # The audio file's encoding, sample rate and language 
 $config = new RecognitionConfig([ 
 'encoding' => AudioEncoding::LINEAR16, 
 'sample_rate_hertz' => 16000, 
 'language_code' => 'en-US' 
 ]); 
 # Instantiates a client 
 $client = new SpeechClient(); 
 # Detects speech in the audio file 
 $response = $client->recognize($config, $audio); 
 # Print most likely transcription 
 foreach ($response->getResults() as $result) { 
 $alternatives = $result->getAlternatives(); 
 $mostLikely = $alternatives[0]; 
 $transcript = $mostLikely->getTranscript(); 
 printf('Transcript: %s' . PHP_EOL, $transcript); 
 } 
 $client->close();

Python

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Python API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  # Imports the Google Cloud client library 
 from 
  
 google.cloud 
  
 import 
 speech 
 def 
  
 run_quickstart 
 () 
 - 
> speech 
 . 
 RecognizeResponse 
 : 
 # Instantiates a client 
 client 
 = 
 speech 
 . 
 SpeechClient 
 () 
 # The name of the audio file to transcribe 
 gcs_uri 
 = 
 "gs://cloud-samples-data/speech/brooklyn_bridge.raw" 
 audio 
 = 
 speech 
 . 
  RecognitionAudio 
 
 ( 
 uri 
 = 
 gcs_uri 
 ) 
 config 
 = 
 speech 
 . 
  RecognitionConfig 
 
 ( 
 encoding 
 = 
 speech 
 . 
 RecognitionConfig 
 . 
 AudioEncoding 
 . 
 LINEAR16 
 , 
 sample_rate_hertz 
 = 
 16000 
 , 
 language_code 
 = 
 "en-US" 
 , 
 ) 
 # Detects speech in the audio file 
 response 
 = 
 client 
 . 
  recognize 
 
 ( 
 config 
 = 
 config 
 , 
 audio 
 = 
 audio 
 ) 
 for 
 result 
 in 
 response 
 . 
 results 
 : 
 print 
 ( 
 f 
 "Transcript: 
 { 
 result 
 . 
 alternatives 
 [ 
 0 
 ] 
 . 
 transcript 
 } 
 " 
 )

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Make an audio transcription request Stay organized with collections Save and categorize content based on your preferences.

Explore further

Code sample

Go

Java

Node.js

PHP

Python

What's next

Make an audio transcription request