Recognize multiple speakers in an audio file stored in Cloud Storage.
Code sample
Java
To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries . For more information, see the Speech-to-Text Java API reference documentation .
To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.api.gax.longrunning. OperationFuture
;
import
com.google.cloud.speech.v1. LongRunningRecognizeMetadata
;
import
com.google.cloud.speech.v1. LongRunningRecognizeResponse
;
import
com.google.cloud.speech.v1. RecognitionAudio
;
import
com.google.cloud.speech.v1. RecognitionConfig
;
import
com.google.cloud.speech.v1. SpeakerDiarizationConfig
;
import
com.google.cloud.speech.v1. SpeechClient
;
import
com.google.cloud.speech.v1. SpeechRecognitionAlternative
;
import
com.google.cloud.speech.v1. WordInfo
;
import
java.io.IOException
;
import
java.util.concurrent.ExecutionException
;
public
class
TranscribeDiarizationGcs
{
static
void
transcribeDiarizationGcs
()
throws
IOException
,
ExecutionException
,
InterruptedException
{
// TODO(developer): Replace these variables before running the sample.
String
gcsUri
=
"gs://cloud-samples-data/speech/commercial_mono.wav"
;
transcribeDiarizationGcs
(
gcsUri
);
}
// Transcribe the give gcs file using speaker diarization
public
static
void
transcribeDiarizationGcs
(
String
gcsUri
)
throws
IOException
,
ExecutionException
,
InterruptedException
{
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try
(
SpeechClient
speechClient
=
SpeechClient
.
create
())
{
SpeakerDiarizationConfig
speakerDiarizationConfig
=
SpeakerDiarizationConfig
.
newBuilder
()
.
setEnableSpeakerDiarization
(
true
)
.
setMinSpeakerCount
(
2
)
.
setMaxSpeakerCount
(
2
)
.
build
();
// Configure request to enable Speaker diarization
RecognitionConfig
config
=
RecognitionConfig
.
newBuilder
()
.
setEncoding
(
RecognitionConfig
.
AudioEncoding
.
LINEAR16
)
.
setLanguageCode
(
"en-US"
)
.
setSampleRateHertz
(
8000
)
.
setDiarizationConfig
(
speakerDiarizationConfig
)
.
build
();
// Set the remote path for the audio file
RecognitionAudio
audio
=
RecognitionAudio
.
newBuilder
().
setUri
(
gcsUri
).
build
();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse
,
LongRunningRecognizeMetadata
>
future
=
speechClient
.
longRunningRecognizeAsync
(
config
,
audio
);
System
.
out
.
println
(
"Waiting for response..."
);
// Speaker Tags are only included in the last result object, which has only one alternative.
LongRunningRecognizeResponse
response
=
future
.
get
();
SpeechRecognitionAlternative
alternative
=
response
.
getResults
(
response
.
getResults
Count ()
-
1
).
getAlternatives
(
0
);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo
wordInfo
=
alternative
.
getWords
(
0
);
int
currentSpeakerTag
=
wordInfo
.
getSpeakerTag
();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder
speakerWords
=
new
StringBuilder
(
String
.
format
(
"Speaker %d: %s"
,
wordInfo
.
getSpeakerTag
(),
wordInfo
.
getWord
()));
for
(
int
i
=
1
;
i
<
alternative
.
getWordsCount
();
i
++
)
{
wordInfo
=
alternative
.
getWords
(
i
);
if
(
currentSpeakerTag
==
wordInfo
.
getSpeakerTag
())
{
speakerWords
.
append
(
" "
);
speakerWords
.
append
(
wordInfo
.
getWord
());
}
else
{
speakerWords
.
append
(
String
.
format
(
"\nSpeaker %d: %s"
,
wordInfo
.
getSpeakerTag
(),
wordInfo
.
getWord
()));
currentSpeakerTag
=
wordInfo
.
getSpeakerTag
();
}
}
System
.
out
.
println
(
speakerWords
.
toString
());
}
}
}
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .