Make an audio transcription request

Transcribe a local audio file synchronously.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Go

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Go API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Sample speech-quickstart uses the Google Cloud Speech API to transcribe 
 // audio. 
 package 
  
 main 
 import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "log" 
  
 speech 
  
 "cloud.google.com/go/speech/apiv1" 
  
 "cloud.google.com/go/speech/apiv1/speechpb" 
 ) 
 func 
  
 main 
 () 
  
 { 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 // Creates a client. 
  
 client 
 , 
  
 err 
  
 := 
  
 speech 
 . 
  NewClient 
 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 log 
 . 
 Fatalf 
 ( 
 "Failed to create client: %v" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 // The path to the remote audio file to transcribe. 
  
 fileURI 
  
 := 
  
 "gs://cloud-samples-data/speech/brooklyn_bridge.raw" 
  
 // Detects speech in the audio file. 
  
 resp 
 , 
  
 err 
  
 := 
  
 client 
 . 
 Recognize 
 ( 
 ctx 
 , 
  
& speechpb 
 . 
 RecognizeRequest 
 { 
  
 Config 
 : 
  
& speechpb 
 . 
 RecognitionConfig 
 { 
  
 Encoding 
 : 
  
 speechpb 
 . 
  RecognitionConfig_LINEAR16 
 
 , 
  
 SampleRateHertz 
 : 
  
 16000 
 , 
  
 LanguageCode 
 : 
  
 "en-US" 
 , 
  
 }, 
  
 Audio 
 : 
  
& speechpb 
 . 
 RecognitionAudio 
 { 
  
 AudioSource 
 : 
  
& speechpb 
 . 
 RecognitionAudio_Uri 
 { 
 Uri 
 : 
  
 fileURI 
 }, 
  
 }, 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 log 
 . 
 Fatalf 
 ( 
 "failed to recognize: %v" 
 , 
  
 err 
 ) 
  
 } 
  
 // Prints the results. 
  
 for 
  
 _ 
 , 
  
 result 
  
 := 
  
 range 
  
 resp 
 . 
 Results 
  
 { 
  
 for 
  
 _ 
 , 
  
 alt 
  
 := 
  
 range 
  
 result 
 . 
 Alternatives 
  
 { 
  
 fmt 
 . 
 Printf 
 ( 
 "\"%v\" (confidence=%3f)\n" 
 , 
  
 alt 
 . 
 Transcript 
 , 
  
 alt 
 . 
 Confidence 
 ) 
  
 } 
  
 } 
 } 
 

Java

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Imports the Google Cloud client library 
 import 
  
 com.google.cloud.speech.v1. RecognitionAudio 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. RecognitionConfig 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. RecognitionConfig 
. AudioEncoding 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. RecognizeResponse 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. SpeechClient 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. SpeechRecognitionAlternative 
 
 ; 
 import 
  
 com.google.cloud.speech.v1. SpeechRecognitionResult 
 
 ; 
 import 
  
 java.util.List 
 ; 
 public 
  
 class 
 QuickstartSample 
  
 { 
  
 /** Demonstrates using the Speech API to transcribe an audio file. */ 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 ... 
  
 args 
 ) 
  
 throws 
  
 Exception 
  
 { 
  
 // Instantiates a client 
  
 try 
  
 ( 
  SpeechClient 
 
  
 speechClient 
  
 = 
  
  SpeechClient 
 
 . 
 create 
 ()) 
  
 { 
  
 // The path to the audio file to transcribe 
  
 String 
  
 gcsUri 
  
 = 
  
 "gs://cloud-samples-data/speech/brooklyn_bridge.raw" 
 ; 
  
 // Builds the sync recognize request 
  
  RecognitionConfig 
 
  
 config 
  
 = 
  
  RecognitionConfig 
 
 . 
 newBuilder 
 () 
  
 . 
  setEncoding 
 
 ( 
  AudioEncoding 
 
 . 
 LINEAR16 
 ) 
  
 . 
  setSampleRateHertz 
 
 ( 
 16000 
 ) 
  
 . 
 setLanguageCode 
 ( 
 "en-US" 
 ) 
  
 . 
 build 
 (); 
  
  RecognitionAudio 
 
  
 audio 
  
 = 
  
  RecognitionAudio 
 
 . 
 newBuilder 
 (). 
 setUri 
 ( 
 gcsUri 
 ). 
 build 
 (); 
  
 // Performs speech recognition on the audio file 
  
  RecognizeResponse 
 
  
 response 
  
 = 
  
 speechClient 
 . 
 recognize 
 ( 
 config 
 , 
  
 audio 
 ); 
  
 List<SpeechRecognitionResult> 
  
 results 
  
 = 
  
 response 
 . 
  getResultsList 
 
 (); 
  
 for 
  
 ( 
  SpeechRecognitionResult 
 
  
 result 
  
 : 
  
 results 
 ) 
  
 { 
  
 // There can be several alternative transcripts for a given chunk of speech. Just use the 
  
 // first (most likely) one here. 
  
  SpeechRecognitionAlternative 
 
  
 alternative 
  
 = 
  
 result 
 . 
 getAlternativesList 
 (). 
 get 
 ( 
 0 
 ); 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Transcription: %s%n" 
 , 
  
 alternative 
 . 
  getTranscript 
 
 ()); 
  
 } 
  
 } 
  
 } 
 } 
 

Node.js

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Node.js API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Imports the Google Cloud client library 
 const 
  
 speech 
  
 = 
  
 require 
 ( 
 ' @google-cloud/speech 
' 
 ); 
 // Creates a client 
 const 
  
 client 
  
 = 
  
 new 
  
 speech 
 . 
  SpeechClient 
 
 (); 
 async 
  
 function 
  
 quickstart 
 () 
  
 { 
  
 // The path to the remote LINEAR16 file 
  
 const 
  
 gcsUri 
  
 = 
  
 'gs://cloud-samples-data/speech/brooklyn_bridge.raw' 
 ; 
  
 // The audio file's encoding, sample rate in hertz, and BCP-47 language code 
  
 const 
  
 audio 
  
 = 
  
 { 
  
 uri 
 : 
  
 gcsUri 
 , 
  
 }; 
  
 const 
  
 config 
  
 = 
  
 { 
  
 encoding 
 : 
  
 'LINEAR16' 
 , 
  
 sampleRateHertz 
 : 
  
 16000 
 , 
  
 languageCode 
 : 
  
 'en-US' 
 , 
  
 }; 
  
 const 
  
 request 
  
 = 
  
 { 
  
 audio 
 : 
  
 audio 
 , 
  
 config 
 : 
  
 config 
 , 
  
 }; 
  
 // Detects speech in the audio file 
  
 const 
  
 [ 
 response 
 ] 
  
 = 
  
 await 
  
 client 
 . 
 recognize 
 ( 
 request 
 ); 
  
 const 
  
 transcription 
  
 = 
  
 response 
 . 
 results 
  
 . 
 map 
 ( 
 result 
  
 = 
>  
 result 
 . 
 alternatives 
 [ 
 0 
 ]. 
 transcript 
 ) 
  
 . 
 join 
 ( 
 '\n' 
 ); 
  
 console 
 . 
 log 
 ( 
 `Transcription: 
 ${ 
 transcription 
 } 
 ` 
 ); 
 } 
 quickstart 
 (); 
 

PHP

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  # Includes the autoloader for libraries installed with composer 
 require __DIR__ . '/vendor/autoload.php'; 
 # Imports the Google Cloud client library 
 use Google\Cloud\Speech\V1\SpeechClient; 
 use Google\Cloud\Speech\V1\RecognitionAudio; 
 use Google\Cloud\Speech\V1\RecognitionConfig; 
 use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding; 
 # The name of the audio file to transcribe 
 $gcsURI = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw'; 
 # set string as audio content 
 $audio = (new RecognitionAudio()) 
 ->setUri($gcsURI); 
 # The audio file's encoding, sample rate and language 
 $config = new RecognitionConfig([ 
 'encoding' => AudioEncoding::LINEAR16, 
 'sample_rate_hertz' => 16000, 
 'language_code' => 'en-US' 
 ]); 
 # Instantiates a client 
 $client = new SpeechClient(); 
 # Detects speech in the audio file 
 $response = $client->recognize($config, $audio); 
 # Print most likely transcription 
 foreach ($response->getResults() as $result) { 
 $alternatives = $result->getAlternatives(); 
 $mostLikely = $alternatives[0]; 
 $transcript = $mostLikely->getTranscript(); 
 printf('Transcript: %s' . PHP_EOL, $transcript); 
 } 
 $client->close(); 
 

Python

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Python API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  # Imports the Google Cloud client library 
 from 
  
 google.cloud 
  
 import 
 speech 
 def 
  
 run_quickstart 
 () 
 - 
> speech 
 . 
 RecognizeResponse 
 : 
 # Instantiates a client 
 client 
 = 
 speech 
 . 
 SpeechClient 
 () 
 # The name of the audio file to transcribe 
 gcs_uri 
 = 
 "gs://cloud-samples-data/speech/brooklyn_bridge.raw" 
 audio 
 = 
 speech 
 . 
  RecognitionAudio 
 
 ( 
 uri 
 = 
 gcs_uri 
 ) 
 config 
 = 
 speech 
 . 
  RecognitionConfig 
 
 ( 
 encoding 
 = 
 speech 
 . 
 RecognitionConfig 
 . 
 AudioEncoding 
 . 
 LINEAR16 
 , 
 sample_rate_hertz 
 = 
 16000 
 , 
 language_code 
 = 
 "en-US" 
 , 
 ) 
 # Detects speech in the audio file 
 response 
 = 
 client 
 . 
  recognize 
 
 ( 
 config 
 = 
 config 
 , 
 audio 
 = 
 audio 
 ) 
 for 
 result 
 in 
 response 
 . 
 results 
 : 
 print 
 ( 
 f 
 "Transcript: 
 { 
 result 
 . 
 alternatives 
 [ 
 0 
 ] 
 . 
 transcript 
 } 
 " 
 ) 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Design a Mobile Site
View Site in Mobile | Classic
Share by: