Skip to content

Commit 02bdaff

Browse files
chore: Simplified speech samples (set-4) (GoogleCloudPlatform#12316)
* Simplify next(4th) set of Speech samples * Minor doc changes * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent a102e82 commit 02bdaff

12 files changed

+148
-159
lines changed

speech/snippets/transcribe.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,29 +13,28 @@
1313
# limitations under the License.
1414

1515
"""Google Cloud Speech API sample application using the REST API for batch
16-
processing.
17-
18-
Example usage:
19-
python transcribe.py resources/audio.raw
20-
python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac
21-
"""
16+
processing."""
2217

2318
# [START speech_transcribe_sync]
24-
import argparse
25-
2619
from google.cloud import speech
2720

2821

29-
def transcribe_file(speech_file: str) -> speech.RecognizeResponse:
30-
"""Transcribe the given audio file."""
22+
def transcribe_file(audio_file: str) -> speech.RecognizeResponse:
23+
"""Transcribe the given audio file.
24+
Args:
25+
audio_file (str): Path to the local audio file to be transcribed.
26+
Example: "resources/audio.wav"
27+
Returns:
28+
cloud_speech.RecognizeResponse: The response containing the transcription results
29+
"""
3130
client = speech.SpeechClient()
3231

3332
# [START speech_python_migration_sync_request]
3433
# [START speech_python_migration_config]
35-
with open(speech_file, "rb") as audio_file:
36-
content = audio_file.read()
34+
with open(audio_file, "rb") as f:
35+
audio_content = f.read()
3736

38-
audio = speech.RecognitionAudio(content=content)
37+
audio = speech.RecognitionAudio(content=audio_content)
3938
config = speech.RecognitionConfig(
4039
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
4140
sample_rate_hertz=16000,
@@ -61,14 +60,20 @@ def transcribe_file(speech_file: str) -> speech.RecognizeResponse:
6160

6261

6362
# [START speech_transcribe_sync_gcs]
64-
def transcribe_gcs(gcs_uri: str) -> speech.RecognizeResponse:
65-
"""Transcribes the audio file specified by the gcs_uri."""
63+
def transcribe_gcs(audio_uri: str) -> speech.RecognizeResponse:
64+
"""Transcribes the audio file specified by the gcs_uri.
65+
Args:
66+
audio_uri (str): The Google Cloud Storage URI of the input audio file.
67+
E.g., gs://cloud-samples-data/speech/audio.flac
68+
Returns:
69+
cloud_speech.RecognizeResponse: The response containing the transcription results
70+
"""
6671
from google.cloud import speech
6772

6873
client = speech.SpeechClient()
6974

7075
# [START speech_python_migration_config_gcs]
71-
audio = speech.RecognitionAudio(uri=gcs_uri)
76+
audio = speech.RecognitionAudio(uri=audio_uri)
7277
config = speech.RecognitionConfig(
7378
encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
7479
sample_rate_hertz=16000,
@@ -91,12 +96,9 @@ def transcribe_gcs(gcs_uri: str) -> speech.RecognizeResponse:
9196

9297

9398
if __name__ == "__main__":
94-
parser = argparse.ArgumentParser(
95-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
96-
)
97-
parser.add_argument("path", help="File or GCS path for audio file to be recognized")
98-
args = parser.parse_args()
99-
if args.path.startswith("gs://"):
100-
transcribe_gcs(args.path)
99+
# It could be a local path like: path_to_file = "resources/audio.raw"
100+
path_to_file = "gs://cloud-samples-data/speech/audio.flac"
101+
if path_to_file.startswith("gs://"):
102+
transcribe_gcs(path_to_file)
101103
else:
102-
transcribe_file(args.path)
104+
transcribe_file(path_to_file)

speech/snippets/transcribe_multichannel.py

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,27 @@
1313
# limitations under the License.
1414

1515
"""Google Cloud Speech API sample that demonstrates multichannel recognition.
16-
17-
Example usage:
18-
python transcribe_multichannel.py resources/multi.wav
19-
python transcribe_multichannel.py \
20-
gs://cloud-samples-tests/speech/multi.wav
2116
"""
2217

2318
# [START speech_transcribe_multichannel]
24-
import argparse
2519

2620
from google.cloud import speech
2721

2822

29-
def transcribe_file_with_multichannel(speech_file: str) -> speech.RecognizeResponse:
30-
"""Transcribe the given audio file synchronously with
31-
multi channel.
32-
23+
def transcribe_file_with_multichannel(audio_file: str) -> speech.RecognizeResponse:
24+
"""Transcribe the given audio file synchronously with multi channel.
3325
Args:
34-
speech_file: A path to audio file to be recognized.
35-
26+
audio_file (str): Path to the local audio file to be transcribed.
27+
Example: "resources/multi.wav"
3628
Returns:
37-
The RecognizeResponse results.
29+
cloud_speech.RecognizeResponse: The full response object which includes the transcription results.
3830
"""
3931
client = speech.SpeechClient()
4032

41-
with open(speech_file, "rb") as audio_file:
42-
content = audio_file.read()
33+
with open(audio_file, "rb") as f:
34+
audio_content = f.read()
4335

44-
audio = speech.RecognitionAudio(content=content)
36+
audio = speech.RecognitionAudio(content=audio_content)
4537

4638
config = speech.RecognitionConfig(
4739
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
@@ -64,22 +56,20 @@ def transcribe_file_with_multichannel(speech_file: str) -> speech.RecognizeRespo
6456
# [END speech_transcribe_multichannel]
6557

6658

67-
def transcribe_gcs_with_multichannel(gcs_uri: str) -> speech.RecognizeResponse:
68-
"""Transcribe the given audio file on GCS with
69-
multi channel.
70-
59+
def transcribe_gcs_with_multichannel(audio_uri: str) -> speech.RecognizeResponse:
60+
"""Transcribe the given audio file from Google Cloud Storage synchronously with multichannel.
7161
Args:
72-
gcs_uri: A path to audio file to be recognized.
73-
62+
audio_uri (str): The Cloud Storage URI of the input audio.
63+
E.g., gs://cloud-samples-data/speech/multi.wav
7464
Returns:
75-
The RecognizeResponse results.
65+
speech.RecognizeResponse: The full response object which includes the transcription results.
7666
"""
7767
# [START speech_transcribe_multichannel_gcs]
7868
from google.cloud import speech
7969

8070
client = speech.SpeechClient()
8171

82-
audio = speech.RecognitionAudio(uri=gcs_uri)
72+
audio = speech.RecognitionAudio(uri=audio_uri)
8373

8474
config = speech.RecognitionConfig(
8575
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
@@ -103,12 +93,9 @@ def transcribe_gcs_with_multichannel(gcs_uri: str) -> speech.RecognizeResponse:
10393

10494

10595
if __name__ == "__main__":
106-
parser = argparse.ArgumentParser(
107-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
108-
)
109-
parser.add_argument("path", help="File or GCS path for audio file to be recognized")
110-
args = parser.parse_args()
111-
if args.path.startswith("gs://"):
112-
transcribe_gcs_with_multichannel(args.path)
96+
# It could be a local path like: path_to_file = "resources/multi.wav"
97+
path_to_file = "gs://cloud-samples-data/speech/multi.wav"
98+
if path_to_file.startswith("gs://"):
99+
transcribe_gcs_with_multichannel(path_to_file)
113100
else:
114-
transcribe_file_with_multichannel(args.path)
101+
transcribe_file_with_multichannel(path_to_file)

speech/snippets/transcribe_multichannel_v2.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,31 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
16-
import argparse
17-
1815
# [START speech_transcribe_multichannel_v2]
16+
import os
17+
1918
from google.cloud.speech_v2 import SpeechClient
2019
from google.cloud.speech_v2.types import cloud_speech
2120

21+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
22+
2223

2324
def transcribe_multichannel_v2(
24-
project_id: str,
2525
audio_file: str,
2626
) -> cloud_speech.RecognizeResponse:
27-
"""Transcribe a multi-channel audio file."""
27+
"""Transcribe the given audio file synchronously with multichannel.
28+
Args:
29+
audio_file (str): Path to the local audio file to be transcribed.
30+
Example: "resources/two_channel_16k.wav"
31+
Returns:
32+
cloud_speech.RecognizeResponse: The full response object which includes the transcription results.
33+
"""
2834
# Instantiates a client
2935
client = SpeechClient()
3036

3137
# Reads a file as bytes
3238
with open(audio_file, "rb") as f:
33-
content = f.read()
39+
audio_content = f.read()
3440

3541
config = cloud_speech.RecognitionConfig(
3642
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
@@ -42,9 +48,9 @@ def transcribe_multichannel_v2(
4248
)
4349

4450
request = cloud_speech.RecognizeRequest(
45-
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
51+
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
4652
config=config,
47-
content=content,
53+
content=audio_content,
4854
)
4955

5056
# Transcribes the audio into text
@@ -61,10 +67,4 @@ def transcribe_multichannel_v2(
6167

6268

6369
if __name__ == "__main__":
64-
parser = argparse.ArgumentParser(
65-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
66-
)
67-
parser.add_argument("project_id", help="GCP Project ID")
68-
parser.add_argument("audio_file", help="Audio file to stream")
69-
args = parser.parse_args()
70-
transcribe_multichannel_v2(args.project_id, args.audio_file)
70+
transcribe_multichannel_v2("resources/two_channel_16k.wav")

speech/snippets/transcribe_multichannel_v2_test.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@
2424

2525
@Retry()
2626
def test_transcribe_multichannel_v2() -> None:
27-
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
28-
2927
response = transcribe_multichannel_v2.transcribe_multichannel_v2(
30-
project_id, os.path.join(_RESOURCES, "two_channel_16k.wav")
28+
os.path.join(_RESOURCES, "two_channel_16k.wav")
3129
)
3230

3331
assert re.search(

speech/snippets/transcribe_multiple_languages_v2.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,36 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
16-
import argparse
17-
1815
# [START speech_transcribe_multiple_languages_v2]
16+
import os
17+
1918
from typing import List
2019

2120
from google.cloud.speech_v2 import SpeechClient
2221
from google.cloud.speech_v2.types import cloud_speech
2322

23+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
24+
2425

2526
def transcribe_multiple_languages_v2(
26-
project_id: str,
27-
language_codes: List[str],
2827
audio_file: str,
28+
language_codes: List[str],
2929
) -> cloud_speech.RecognizeResponse:
30-
"""Transcribe an audio file."""
31-
# Instantiates a client
30+
"""Transcribe an audio file using Google Cloud Speech-to-Text API with support for multiple languages.
31+
Args:
32+
audio_file (str): Path to the local audio file to be transcribed.
33+
Example: "resources/audio.wav"
34+
language_codes (List[str]): A list of BCP-47 language codes to be used for transcription.
35+
Example: ["en-US", "fr-FR"]
36+
Returns:
37+
cloud_speech.RecognizeResponse: The response from the Speech-to-Text API containing the
38+
transcription results.
39+
"""
3240
client = SpeechClient()
3341

3442
# Reads a file as bytes
3543
with open(audio_file, "rb") as f:
36-
content = f.read()
44+
audio_content = f.read()
3745

3846
config = cloud_speech.RecognitionConfig(
3947
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
@@ -42,14 +50,14 @@ def transcribe_multiple_languages_v2(
4250
)
4351

4452
request = cloud_speech.RecognizeRequest(
45-
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
53+
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
4654
config=config,
47-
content=content,
55+
content=audio_content,
4856
)
4957

5058
# Transcribes the audio into text
5159
response = client.recognize(request=request)
52-
60+
# Prints the transcription results
5361
for result in response.results:
5462
print(f"Transcript: {result.alternatives[0].transcript}")
5563

@@ -60,15 +68,7 @@ def transcribe_multiple_languages_v2(
6068

6169

6270
if __name__ == "__main__":
63-
parser = argparse.ArgumentParser(
64-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
65-
)
66-
parser.add_argument("project_id", help="GCP Project ID")
67-
parser.add_argument(
68-
"language_codes", nargs="+", help="Language codes to transcribe"
69-
)
70-
parser.add_argument("audio_file", help="Audio file to stream")
71-
args = parser.parse_args()
71+
# Language codes to transcribe
7272
transcribe_multiple_languages_v2(
73-
args.project_id, args.language_codes, args.audio_file
73+
audio_file="resources/audio.wav", language_codes=["en-US", "fr-FR"]
7474
)

speech/snippets/transcribe_multiple_languages_v2_test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@
2424

2525
@Retry()
2626
def test_transcribe_multiple_languages_v2() -> None:
27-
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
28-
2927
response = transcribe_multiple_languages_v2.transcribe_multiple_languages_v2(
30-
project_id, ["en-US", "fr-FR"], os.path.join(RESOURCES, "audio.wav")
28+
os.path.join(RESOURCES, "audio.wav"),
29+
["en-US", "fr-FR"],
3130
)
3231

3332
assert re.search(

0 commit comments

Comments
 (0)