Performs streaming speech recognition on raw PCM audio data.
Code sample
Java
To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .
To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public
static
void
streamingTranscribeWithAutomaticPunctuation
(
String
fileName
)
throws
Exception
{
Path
path
=
Paths
.
get
(
fileName
);
byte
[]
data
=
Files
.
readAllBytes
(
path
);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try
(
SpeechClient
speech
=
SpeechClient
.
create
())
{
// Configure request with local raw PCM audio
RecognitionConfig
recConfig
=
RecognitionConfig
.
newBuilder
()
.
setEncoding
(
AudioEncoding
.
LINEAR16
)
.
setLanguageCode
(
"en-US"
)
.
setSampleRateHertz
(
16000
)
.
setEnableAutomaticPunctuation
(
true
)
.
build
();
// Build the streaming config with the audio config
StreamingRecognitionConfig
config
=
StreamingRecognitionConfig
.
newBuilder
().
setConfig
(
recConfig
).
build
();
class
ResponseApiStreamingObserver<T>
implements
ApiStreamObserver<T>
{
private
final
SettableFuture<List<T>
>
future
=
SettableFuture
.
create
();
private
final
List<T>
messages
=
new
java
.
util
.
ArrayList<T>
();
@Override
public
void
onNext
(
T
message
)
{
messages
.
add
(
message
);
}
@Override
public
void
onError
(
Throwable
t
)
{
future
.
setException
(
t
);
}
@Override
public
void
onCompleted
()
{
future
.
set
(
messages
);
}
// Returns the SettableFuture object to get received messages / exceptions.
public
SettableFuture<List<T>
>
future
()
{
return
future
;
}
}
ResponseApiStreamingObserver<StreamingRecognizeResponse>
responseObserver
=
new
ResponseApiStreamingObserver
<> ();
BidiStreamingCallable<StreamingRecognizeRequest
,
StreamingRecognizeResponse
>
callable
=
speech
.
streamingRecognizeCallable
();
ApiStreamObserver<StreamingRecognizeRequest>
requestObserver
=
callable
.
bidiStreamingCall
(
responseObserver
);
// The first request must **only** contain the audio configuration:
requestObserver
.
onNext
(
StreamingRecognizeRequest
.
newBuilder
().
setStreamingConfig
(
config
).
build
());
// Subsequent requests must **only** contain the audio data.
requestObserver
.
onNext
(
StreamingRecognizeRequest
.
newBuilder
()
.
setAudioContent
(
ByteString
.
copyFrom
(
data
))
.
build
());
// Mark transmission as completed after sending the data.
requestObserver
.
onCompleted
();
List<StreamingRecognizeResponse>
responses
=
responseObserver
.
future
().
get
();
for
(
StreamingRecognizeResponse
response
:
responses
)
{
// For streaming recognize, the results list has one is_final result (if available) followed
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
// Just print the first result here.
StreamingRecognitionResult
result
=
response
.
getResultsList
().
get
(
0
);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative
alternative
=
result
.
getAlternativesList
().
get
(
0
);
System
.
out
.
printf
(
"Transcript : %s\n"
,
alternative
.
getTranscript
());
}
}
}
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .