Streaming speech recognition with punctuation

Performs streaming speech recognition on raw PCM audio data.

Code sample

Java

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  /** 
 * Performs streaming speech recognition on raw PCM audio data. 
 * 
 * @param fileName the path to a PCM audio file to transcribe. 
 */ 
 public 
  
 static 
  
 void 
  
 streamingTranscribeWithAutomaticPunctuation 
 ( 
 String 
  
 fileName 
 ) 
  
 throws 
  
 Exception 
  
 { 
  
 Path 
  
 path 
  
 = 
  
 Paths 
 . 
 get 
 ( 
 fileName 
 ); 
  
 byte 
 [] 
  
 data 
  
 = 
  
 Files 
 . 
 readAllBytes 
 ( 
 path 
 ); 
  
 // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS 
  
 try 
  
 ( 
 SpeechClient 
  
 speech 
  
 = 
  
 SpeechClient 
 . 
 create 
 ()) 
  
 { 
  
 // Configure request with local raw PCM audio 
  
 RecognitionConfig 
  
 recConfig 
  
 = 
  
 RecognitionConfig 
 . 
 newBuilder 
 () 
  
 . 
 setEncoding 
 ( 
 AudioEncoding 
 . 
 LINEAR16 
 ) 
  
 . 
 setLanguageCode 
 ( 
 "en-US" 
 ) 
  
 . 
 setSampleRateHertz 
 ( 
 16000 
 ) 
  
 . 
 setEnableAutomaticPunctuation 
 ( 
 true 
 ) 
  
 . 
 build 
 (); 
  
 // Build the streaming config with the audio config 
  
 StreamingRecognitionConfig 
  
 config 
  
 = 
  
 StreamingRecognitionConfig 
 . 
 newBuilder 
 (). 
 setConfig 
 ( 
 recConfig 
 ). 
 build 
 (); 
  
 class 
 ResponseApiStreamingObserver<T> 
  
 implements 
  
 ApiStreamObserver<T> 
  
 { 
  
 private 
  
 final 
  
 SettableFuture<List<T> 
>  
 future 
  
 = 
  
 SettableFuture 
 . 
 create 
 (); 
  
 private 
  
 final 
  
 List<T> 
  
 messages 
  
 = 
  
 new 
  
 java 
 . 
 util 
 . 
 ArrayList<T> 
 (); 
  
 @Override 
  
 public 
  
 void 
  
 onNext 
 ( 
 T 
  
 message 
 ) 
  
 { 
  
 messages 
 . 
 add 
 ( 
 message 
 ); 
  
 } 
  
 @Override 
  
 public 
  
 void 
  
 onError 
 ( 
 Throwable 
  
 t 
 ) 
  
 { 
  
 future 
 . 
 setException 
 ( 
 t 
 ); 
  
 } 
  
 @Override 
  
 public 
  
 void 
  
 onCompleted 
 () 
  
 { 
  
 future 
 . 
 set 
 ( 
 messages 
 ); 
  
 } 
  
 // Returns the SettableFuture object to get received messages / exceptions. 
  
 public 
  
 SettableFuture<List<T> 
>  
 future 
 () 
  
 { 
  
 return 
  
 future 
 ; 
  
 } 
  
 } 
  
 ResponseApiStreamingObserver<StreamingRecognizeResponse> 
  
 responseObserver 
  
 = 
  
 new 
  
 ResponseApiStreamingObserver 
<> (); 
  
 BidiStreamingCallable<StreamingRecognizeRequest 
 , 
  
 StreamingRecognizeResponse 
>  
 callable 
  
 = 
  
 speech 
 . 
 streamingRecognizeCallable 
 (); 
  
 ApiStreamObserver<StreamingRecognizeRequest> 
  
 requestObserver 
  
 = 
  
 callable 
 . 
 bidiStreamingCall 
 ( 
 responseObserver 
 ); 
  
 // The first request must **only** contain the audio configuration: 
  
 requestObserver 
 . 
 onNext 
 ( 
  
 StreamingRecognizeRequest 
 . 
 newBuilder 
 (). 
 setStreamingConfig 
 ( 
 config 
 ). 
 build 
 ()); 
  
 // Subsequent requests must **only** contain the audio data. 
  
 requestObserver 
 . 
 onNext 
 ( 
  
 StreamingRecognizeRequest 
 . 
 newBuilder 
 () 
  
 . 
 setAudioContent 
 ( 
 ByteString 
 . 
 copyFrom 
 ( 
 data 
 )) 
  
 . 
 build 
 ()); 
  
 // Mark transmission as completed after sending the data. 
  
 requestObserver 
 . 
 onCompleted 
 (); 
  
 List<StreamingRecognizeResponse> 
  
 responses 
  
 = 
  
 responseObserver 
 . 
 future 
 (). 
 get 
 (); 
  
 for 
  
 ( 
 StreamingRecognizeResponse 
  
 response 
  
 : 
  
 responses 
 ) 
  
 { 
  
 // For streaming recognize, the results list has one is_final result (if available) followed 
  
 // by a number of in-progress results (if iterim_results is true) for subsequent utterances. 
  
 // Just print the first result here. 
  
 StreamingRecognitionResult 
  
 result 
  
 = 
  
 response 
 . 
 getResultsList 
 (). 
 get 
 ( 
 0 
 ); 
  
 // There can be several alternative transcripts for a given chunk of speech. Just use the 
  
 // first (most likely) one here. 
  
 SpeechRecognitionAlternative 
  
 alternative 
  
 = 
  
 result 
 . 
 getAlternativesList 
 (). 
 get 
 ( 
 0 
 ); 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Transcript : %s\n" 
 , 
  
 alternative 
 . 
 getTranscript 
 ()); 
  
 } 
  
 } 
 } 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Design a Mobile Site
View Site in Mobile | Classic
Share by: