Get automatic punctuation

This page describes how to get automatic punctuation in transcription results from Speech-to-Text. When you enable this feature, Speech-to-Text automatically infers the presence of periods, commas, and question marks in your audio data and adds them to the transcript.

By default, Speech-to-Text does not include punctuation marks in the results from speech recognition. However, you can request that Speech-to-Text automatically detect and insert punctuation in transcription results. When you enable automatic punctuation Speech-to-Text will also automatically capitalize the first letter after each period and question mark.

To enable automatic punctuation, set the enableAutomaticPunctuation field to true in the RecognitionConfig parameters for the request. The Speech-to-Text API supports automatic punctuation for all speech recognition methods: speech:recognize , speech:longrunningrecognize , and Streaming .

The following code samples demonstrate how to get automatic punctuation details in a transcription request.

Protocol

Refer to the speech:recognize API endpoint for complete details.

To perform synchronous speech recognition, make a POST request and provide the appropriate request body. The following shows an example of a POST request using curl . The example uses the Google Cloud CLI to generate an access token. For instructions on installing the gcloud CLI, see the quickstart .

curl -s -H "Content-Type: application/json" \
    -H "Authorization: Bearer "$(gcloud auth print-access-token) \
    https://speech.googleapis.com/v1/speech:recognize \
    --data '{
  "config": {
    "encoding":"FLAC",
    "sampleRateHertz": 16000,
    "languageCode": "en-US", "enableAutomaticPunctuation": true},
  "audio": {
    "uri":"gs://cloud-samples-tests/speech/brooklyn.flac"
  }
}'

See the RecognitionConfig reference documentation for more information on configuring the request body.

If the request is successful, the server returns a 200 OK HTTP status code and the response in JSON format:

{
  "results": [
    {
      "alternatives": [
        {
          "transcript": "How old is the Brooklyn Bridge?",
          "confidence": 0.98360395
        }
      ]
    }
  ]
}

Go

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Go API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "io" 
  
 "os" 
  
 "strings" 
  
 speech 
  
 "cloud.google.com/go/speech/apiv1" 
  
 "cloud.google.com/go/speech/apiv1/speechpb" 
 ) 
 func 
  
 autoPunctuation 
 ( 
 w 
  
 io 
 . 
 Writer 
 , 
  
 path 
  
 string 
 ) 
  
 error 
  
 { 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 speech 
 . 
  NewClient 
 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "NewClient: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 // path = "../testdata/commercial_mono.wav" 
  
 data 
 , 
  
 err 
  
 := 
  
 os 
 . 
 ReadFile 
 ( 
 path 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "ReadFile: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 resp 
 , 
  
 err 
  
 := 
  
 client 
 . 
 Recognize 
 ( 
 ctx 
 , 
  
& speechpb 
 . 
 RecognizeRequest 
 { 
  
 Config 
 : 
  
& speechpb 
 . 
 RecognitionConfig 
 { 
  
 Encoding 
 : 
  
 speechpb 
 . 
  RecognitionConfig_LINEAR16 
 
 , 
  
 SampleRateHertz 
 : 
  
 8000 
 , 
  
 LanguageCode 
 : 
  
 "en-US" 
 , 
  
 // Enable automatic punctuation. 
  
 EnableAutomaticPunctuation 
 : 
  
 true 
 , 
  
 }, 
  
 Audio 
 : 
  
& speechpb 
 . 
 RecognitionAudio 
 { 
  
 AudioSource 
 : 
  
& speechpb 
 . 
 RecognitionAudio_Content 
 { 
 Content 
 : 
  
 data 
 }, 
  
 }, 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "Recognize: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 for 
  
 i 
 , 
  
 result 
  
 := 
  
 range 
  
 resp 
 . 
 Results 
  
 { 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "%s\n" 
 , 
  
 strings 
 . 
 Repeat 
 ( 
 "-" 
 , 
  
 20 
 )) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "Result %d\n" 
 , 
  
 i 
 + 
 1 
 ) 
  
 for 
  
 j 
 , 
  
 alternative 
  
 := 
  
 range 
  
 result 
 . 
 Alternatives 
  
 { 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "Alternative %d: %s\n" 
 , 
  
 j 
 + 
 1 
 , 
  
 alternative 
 . 
 Transcript 
 ) 
  
 } 
  
 } 
  
 return 
  
 nil 
 } 
 

Java

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  /** 
 * Performs transcription on remote FLAC file and prints the transcription. 
 * 
 * @param gcsUri the path to the remote FLAC audio file to transcribe. 
 */ 
 public 
  
 static 
  
 void 
  
 transcribeGcsWithAutomaticPunctuation 
 ( 
 String 
  
 gcsUri 
 ) 
  
 throws 
  
 Exception 
  
 { 
  
 try 
  
 ( 
 SpeechClient 
  
 speechClient 
  
 = 
  
 SpeechClient 
 . 
 create 
 ()) 
  
 { 
  
 // Configure request with raw PCM audio 
  
 RecognitionConfig 
  
 config 
  
 = 
  
 RecognitionConfig 
 . 
 newBuilder 
 () 
  
 . 
 setEncoding 
 ( 
 AudioEncoding 
 . 
 FLAC 
 ) 
  
 . 
 setLanguageCode 
 ( 
 "en-US" 
 ) 
  
 . 
 setSampleRateHertz 
 ( 
 16000 
 ) 
  
 . 
 setEnableAutomaticPunctuation 
 ( 
 true 
 ) 
  
 . 
 build 
 (); 
  
 // Set the remote path for the audio file 
  
 RecognitionAudio 
  
 audio 
  
 = 
  
 RecognitionAudio 
 . 
 newBuilder 
 (). 
 setUri 
 ( 
 gcsUri 
 ). 
 build 
 (); 
  
 // Use non-blocking call for getting file transcription 
  
 OperationFuture<LongRunningRecognizeResponse 
 , 
  
 LongRunningRecognizeMetadata 
>  
 response 
  
 = 
  
 speechClient 
 . 
 longRunningRecognizeAsync 
 ( 
 config 
 , 
  
 audio 
 ); 
  
 while 
  
 ( 
 ! 
 response 
 . 
 isDone 
 ()) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Waiting for response..." 
 ); 
  
 Thread 
 . 
 sleep 
 ( 
 10000 
 ); 
  
 } 
  
 // Just print the first result here. 
  
 SpeechRecognitionResult 
  
 result 
  
 = 
  
 response 
 . 
 get 
 (). 
 getResultsList 
 (). 
 get 
 ( 
 0 
 ); 
  
 // There can be several alternative transcripts for a given chunk of speech. Just use the 
  
 // first (most likely) one here. 
  
 SpeechRecognitionAlternative 
  
 alternative 
  
 = 
  
 result 
 . 
 getAlternativesList 
 (). 
 get 
 ( 
 0 
 ); 
  
 // Print out the result 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
 "Transcript : %s\n" 
 , 
  
 alternative 
 . 
 getTranscript 
 ()); 
  
 } 
 } 
 

Node.js

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Node.js API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Imports the Google Cloud client library for API 
 /** 
 * TODO(developer): Update client library import to use new 
 * version of API when desired features become available 
 */ 
 const 
  
 speech 
  
 = 
  
 require 
 ( 
 ' @google-cloud/speech 
' 
 ); 
 const 
  
 fs 
  
 = 
  
 require 
 ( 
 'fs' 
 ); 
 // Creates a client 
 const 
  
 client 
  
 = 
  
 new 
  
 speech 
 . 
  SpeechClient 
 
 (); 
 /** 
 * TODO(developer): Uncomment the following lines before running the sample. 
 * Include the sampleRateHertz field in the config object. 
 */ 
 // const filename = 'Local path to audio file, e.g. /path/to/audio.raw'; 
 // const encoding = 'Encoding of the audio file, e.g. LINEAR16'; 
 // const sampleRateHertz = 16000; 
 // const languageCode = 'BCP-47 language code, e.g. en-US'; 
 const 
  
 config 
  
 = 
  
 { 
  
 encoding 
 : 
  
 encoding 
 , 
  
 languageCode 
 : 
  
 languageCode 
 , 
  
 enableAutomaticPunctuation 
 : 
  
 true 
 , 
 }; 
 const 
  
 audio 
  
 = 
  
 { 
  
 content 
 : 
  
 fs 
 . 
 readFileSync 
 ( 
 filename 
 ). 
 toString 
 ( 
 'base64' 
 ), 
 }; 
 const 
  
 request 
  
 = 
  
 { 
  
 config 
 : 
  
 config 
 , 
  
 audio 
 : 
  
 audio 
 , 
 }; 
 // Detects speech in the audio file 
 const 
  
 [ 
 response 
 ] 
  
 = 
  
 await 
  
 client 
 . 
 recognize 
 ( 
 request 
 ); 
 const 
  
 transcription 
  
 = 
  
 response 
 . 
 results 
  
 . 
 map 
 ( 
 result 
  
 = 
>  
 result 
 . 
 alternatives 
 [ 
 0 
 ]. 
 transcript 
 ) 
  
 . 
 join 
 ( 
 '\n' 
 ); 
 console 
 . 
 log 
 ( 
 'Transcription: ' 
 , 
  
 transcription 
 ); 
 

Python

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Python API reference documentation .

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  from 
  
 google.cloud 
  
 import 
 speech 
 def 
  
 transcribe_file_with_auto_punctuation 
 ( 
 audio_file 
 : 
 str 
 ) 
 - 
> speech 
 . 
 RecognizeResponse 
 : 
  
 """Transcribe the given audio file with auto punctuation enabled. 
 Args: 
 audio_file (str): Path to the local audio file to be transcribed. 
 Returns: 
 speech.RecognizeResponse: The response containing the transcription results. 
 """ 
 client 
 = 
 speech 
 . 
 SpeechClient 
 () 
 with 
 open 
 ( 
 audio_file 
 , 
 "rb" 
 ) 
 as 
 f 
 : 
 audio_content 
 = 
 f 
 . 
 read 
 () 
 audio 
 = 
 speech 
 . 
 RecognitionAudio 
 ( 
 content 
 = 
 audio_content 
 ) 
 config 
 = 
 speech 
 . 
 RecognitionConfig 
 ( 
 encoding 
 = 
 speech 
 . 
 RecognitionConfig 
 . 
 AudioEncoding 
 . 
 LINEAR16 
 , 
 sample_rate_hertz 
 = 
 8000 
 , 
 language_code 
 = 
 "en-US" 
 , 
 # Enable automatic punctuation 
 enable_automatic_punctuation 
 = 
 True 
 , 
 ) 
 response 
 = 
 client 
 . 
 recognize 
 ( 
 config 
 = 
 config 
 , 
 audio 
 = 
 audio 
 ) 
 for 
 i 
 , 
 result 
 in 
 enumerate 
 ( 
 response 
 . 
 results 
 ): 
 alternative 
 = 
 result 
 . 
 alternatives 
 [ 
 0 
 ] 
 print 
 ( 
 "-" 
 * 
 20 
 ) 
 print 
 ( 
 f 
 "First alternative of result 
 { 
 i 
 } 
 " 
 ) 
 print 
 ( 
 f 
 "Transcript: 
 { 
 alternative 
 . 
 transcript 
 } 
 " 
 ) 
 return 
 response 
 

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the Speech-to-Text reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the Speech-to-Text reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the Speech-to-Text reference documentation for Ruby.

What's next

Review how to make synchronous transcription requests .

Create a Mobile Website
View Site in Mobile | Classic
Share by: