Transcribe a multi-channel file in Cloud Storage

Transcribe an audio file stored in Cloud Storage that includes more than one channel.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  /** 
 * Transcribe a remote audio file with multi-channel recognition 
 * 
 * @param gcsUri the path to the audio file 
 */ 
 public 
  
 static 
  
 void 
  
 transcribeMultiChannelGcs 
 ( 
 String 
  
 gcsUri 
 ) 
  
 throws 
  
 Exception 
  
 { 
  
 try 
  
 ( 
 SpeechClient 
  
 speechClient 
  
 = 
  
 SpeechClient 
 . 
 create 
 ()) 
  
 { 
  
 // Configure request to enable multiple channels 
  
 RecognitionConfig 
  
 config 
  
 = 
  
 RecognitionConfig 
 . 
 newBuilder 
 () 
  
 . 
 setEncoding 
 ( 
 AudioEncoding 
 . 
 LINEAR16 
 ) 
  
 . 
 setLanguageCode 
 ( 
 "en-US" 
 ) 
  
 . 
 setSampleRateHertz 
 ( 
 44100 
 ) 
  
 . 
 setAudioChannelCount 
 ( 
 2 
 ) 
  
 . 
 setEnableSeparateRecognitionPerChannel 
 ( 
 true 
 ) 
  
 . 
 build 
 (); 
  
 // Set the remote path for the audio file 
  
 RecognitionAudio 
  
 audio 
  
 = 
  
 RecognitionAudio 
 . 
 newBuilder 
 (). 
 setUri 
 ( 
 gcsUri 
 ). 
 build 
 (); 
  
 // Use non-blocking call for getting file transcription 
  
 OperationFuture<LongRunningRecognizeResponse 
 , 
  
 LongRunningRecognizeMetadata 
>  
 response 
  
 = 
  
 speechClient 
 . 
 longRunningRecognizeAsync 
 ( 
 config 
 , 
  
 audio 
 ); 
  
 while 
  
 ( 
 ! 
 response 
 . 
 isDone 
 ()) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Waiting for response..." 
 ); 
  
 Thread 
 . 
 sleep 
 ( 
 10000 
 ); 
  
 } 
  
 // Just print the first result here. 
  
 for 
  
 ( 
 SpeechRecognitionResult 
  
 result 
  
 : 
  
 response 
 . 
 get 
 (). 
 getResultsList 
 ()) 
  
 { 
  
 // There can be several alternative transcripts for a given chunk of speech. Just use the 
  
 // first (most likely) one here. 
  
 SpeechRecognitionAlternative 
  
 alternative 
  
 = 
  
 result 
 . 
 getAlternativesList 
 (). 
 get 
 ( 
 0 
 ); 
  
 // Print out the result 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Transcript : %s\n" 
 , 
  
 alternative 
 . 
 getTranscript 
 ()); 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Channel Tag : %s\n" 
 , 
  
 result 
 . 
 getChannelTag 
 ()); 
  
 } 
  
 } 
 } 
 

Node.js

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Node.js API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  const 
  
 speech 
  
 = 
  
 require 
 ( 
 ' @google-cloud/speech 
' 
 ). 
 v1 
 ; 
 // Creates a client 
 const 
  
 client 
  
 = 
  
 new 
  
 speech 
 . 
  SpeechClient 
 
 (); 
 const 
  
 config 
  
 = 
  
 { 
  
 encoding 
 : 
  
 'LINEAR16' 
 , 
  
 languageCode 
 : 
  
 'en-US' 
 , 
  
 audioChannelCount 
 : 
  
 2 
 , 
  
 enableSeparateRecognitionPerChannel 
 : 
  
 true 
 , 
 }; 
 const 
  
 audio 
  
 = 
  
 { 
  
 uri 
 : 
  
 gcsUri 
 , 
 }; 
 const 
  
 request 
  
 = 
  
 { 
  
 config 
 : 
  
 config 
 , 
  
 audio 
 : 
  
 audio 
 , 
 }; 
 const 
  
 [ 
 response 
 ] 
  
 = 
  
 await 
  
 client 
 . 
 recognize 
 ( 
 request 
 ); 
 const 
  
 transcription 
  
 = 
  
 response 
 . 
 results 
  
 . 
 map 
 ( 
  
 result 
  
 = 
>  
 ` Channel Tag: 
 ${ 
 result 
 . 
 channelTag 
 } 
  
 ${ 
 result 
 . 
 alternatives 
 [ 
 0 
 ]. 
 transcript 
 } 
 ` 
  
 ) 
  
 . 
 join 
 ( 
 '\n' 
 ); 
 console 
 . 
 log 
 ( 
 `Transcription: \n 
 ${ 
 transcription 
 } 
 ` 
 ); 
 

Python

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Python API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google.cloud 
  
 import 
 speech 
 client 
 = 
 speech 
 . 
 SpeechClient 
 () 
 audio 
 = 
 speech 
 . 
  RecognitionAudio 
 
 ( 
 uri 
 = 
 audio_uri 
 ) 
 config 
 = 
 speech 
 . 
  RecognitionConfig 
 
 ( 
 encoding 
 = 
 speech 
 . 
 RecognitionConfig 
 . 
 AudioEncoding 
 . 
 LINEAR16 
 , 
 sample_rate_hertz 
 = 
 44100 
 , 
 language_code 
 = 
 "en-US" 
 , 
 audio_channel_count 
 = 
 2 
 , 
 enable_separate_recognition_per_channel 
 = 
 True 
 , 
 ) 
 response 
 = 
 client 
 . 
  recognize 
 
 ( 
 config 
 = 
 config 
 , 
 audio 
 = 
 audio 
 ) 
 for 
 i 
 , 
 result 
 in 
 enumerate 
 ( 
 response 
 . 
 results 
 ): 
 alternative 
 = 
 result 
 . 
 alternatives 
 [ 
 0 
 ] 
 print 
 ( 
 "-" 
 * 
 20 
 ) 
 print 
 ( 
 f 
 "First alternative of result 
 { 
 i 
 } 
 " 
 ) 
 print 
 ( 
 f 
 "Transcript: 
 { 
 alternative 
 . 
 transcript 
 } 
 " 
 ) 
 print 
 ( 
 f 
 "Channel Tag: 
 { 
 result 
 . 
 channel_tag 
 } 
 " 
 ) 
 return 
 result 
 

Ruby

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  # storage_path = "Path to file in Cloud Storage, eg. gs://bucket/audio.raw" 
 require 
  
 "google/cloud/speech" 
 speech 
  
 = 
  
 Google 
 :: 
 Cloud 
 :: 
  Speech 
 
 . 
  speech 
 
  
 version 
 : 
  
 :v1 
 config 
  
 = 
  
 { 
  
 encoding 
 : 
  
 :LINEAR16 
 , 
  
 sample_rate_hertz 
 : 
  
 44_100 
 , 
  
 language_code 
 : 
  
 "en-US" 
 , 
  
 audio_channel_count 
 : 
  
 2 
 , 
  
 enable_separate_recognition_per_channel 
 : 
  
 true 
 } 
 audio 
  
 = 
  
 { 
  
 uri 
 : 
  
 storage_path 
  
 } 
 response 
  
 = 
  
 speech 
 . 
 recognize 
  
 config 
 : 
  
 config 
 , 
  
 audio 
 : 
  
 audio 
 results 
  
 = 
  
 response 
 . 
 results 
 results 
 . 
 each_with_index 
  
 do 
  
 | 
 result 
 , 
  
 i 
 | 
  
 alternative 
  
 = 
  
 result 
 . 
 alternatives 
 . 
 first 
  
 puts 
  
 "-" 
  
 * 
  
 20 
  
 puts 
  
 "First alternative of result 
 #{ 
 i 
 } 
 " 
  
 puts 
  
 "Transcript: 
 #{ 
 alternative 
 . 
 transcript 
 } 
 " 
  
 puts 
  
 "Channel Tag: 
 #{ 
 result 
 . 
 channel_tag 
 } 
 " 
 end 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Design a Mobile Site
View Site in Mobile | Classic
Share by: