Skip to content

Commit c894d73

Browse files
authored
speech gapic migration (GoogleCloudPlatform#801)
* inital commit * fix tests * add tests * update readme * address comments * update quickstart * lint * fixes test for speech words sample * tame test
1 parent 53fcce5 commit c894d73

16 files changed

+283
-376
lines changed

speech/README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,14 @@ To run the Speech Samples:
5757
transcribe-model Transcribe an audio file, with selected model, using Google Cloud Speech API
5858
transcribe-punctuation Transcribe an audio file with proper punctuation, using Google Cloud Speech API
5959
transcribe-stream Transcribe a stream of audio using Google Cloud Speech API
60-
transcribe-words Transcribe an audio file and print word time offsets using Google Cloud Speech API
6160

6261
Once you have a speech sample in the proper format, send it through the speech
6362
API using the transcribe command:
6463

6564
```sh
66-
php speech.php transcribe test/data/audio32KHz.raw --encoding LINEAR16 --sample-rate 32000
67-
php speech.php transcribe-async test/data/audio32KHz.flac --encoding FLAC --sample-rate 32000
68-
php speech.php transcribe-words test/data/audio32KHz.flac --encoding FLAC --sample-rate 32000
65+
php speech.php transcribe test/data/audio32KHz.raw
66+
php speech.php transcribe-async test/data/audio32KHz.raw
67+
php speech.php transcribe-words test/data/audio32KHz.raw
6968

7069
```
7170
## Troubleshooting

speech/composer.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121
"src/transcribe_enhanced_model.php",
2222
"src/transcribe_model_selection.php",
2323
"src/transcribe_sync.php",
24-
"src/transcribe_sync_gcs.php",
25-
"src/transcribe_sync_words.php"
24+
"src/transcribe_sync_gcs.php"
2625
]
2726
}
2827
}

speech/quickstart.php

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,42 @@
2020
require __DIR__ . '/vendor/autoload.php';
2121

2222
# Imports the Google Cloud client library
23-
use Google\Cloud\Speech\SpeechClient;
23+
use Google\Cloud\Speech\V1\SpeechClient;
24+
use Google\Cloud\Speech\V1\RecognitionAudio;
25+
use Google\Cloud\Speech\V1\RecognitionConfig;
26+
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
2427

25-
# Your Google Cloud Platform project ID
26-
$projectId = 'YOUR_PROJECT_ID';
28+
# The name of the audio file to transcribe
29+
$audioFile = __DIR__ . '/test/data/audio32KHz.raw';
2730

28-
# Instantiates a client
29-
$speech = new SpeechClient([
30-
'projectId' => $projectId,
31-
'languageCode' => 'en-US',
32-
]);
31+
# get contents of a file into a string
32+
$content = file_get_contents($audioFile);
3333

34-
# The name of the audio file to transcribe
35-
$fileName = __DIR__ . '/resources/audio.raw';
34+
# set string as audio content
35+
$audio = (new RecognitionAudio())
36+
->setContent($content);
3637

37-
# The audio file's encoding and sample rate
38-
$options = [
39-
'encoding' => 'LINEAR16',
40-
'sampleRateHertz' => 16000,
41-
];
38+
# The audio file's encoding, sample rate and language
39+
$config = new RecognitionConfig([
40+
'encoding' => AudioEncoding::LINEAR16,
41+
'sample_rate_hertz' => 32000,
42+
'language_code' => 'en-US'
43+
]);
4244

43-
# Detects speech in the audio file
44-
$results = $speech->recognize(fopen($fileName, 'r'), $options);
45+
# Instantiates a client
46+
$client = new SpeechClient();
4547

46-
foreach ($results as $result) {
47-
echo 'Transcription: ' . $result->alternatives()[0]['transcript'] . PHP_EOL;
48+
# Detects speech in the audio file
49+
$response = $client->recognize($config, $audio);
50+
51+
# Print most likely transcription
52+
foreach ($response->getResults() as $result) {
53+
$alternatives = $result->getAlternatives();
54+
$mostLikely = $alternatives[0];
55+
$transcript = $mostLikely->getTranscript();
56+
printf('Transcript: %s' . PHP_EOL, $transcript);
4857
}
4958

59+
$client->close();
60+
5061
# [END speech_quickstart]
51-
return $results;

speech/speech.php

Lines changed: 7 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,7 @@
3232
new InputOption('encoding', null, InputOption::VALUE_REQUIRED,
3333
'The encoding of the audio file. This is required if the encoding is ' .
3434
'unable to be determined. '
35-
),
36-
new InputOption('language-code', null, InputOption::VALUE_REQUIRED,
37-
'The language code for the language used in the source file. ',
38-
'en-US'
39-
),
40-
new InputOption('sample-rate', null, InputOption::VALUE_REQUIRED,
41-
'The sample rate of the audio file in hertz. This is required ' .
42-
'if the sample rate is unable to be determined. '
43-
),
44-
new InputOption('sample-rate', null, InputOption::VALUE_REQUIRED,
45-
'The sample rate of the audio file in hertz. This is required ' .
46-
'if the sample rate is unable to be determined. '
47-
),
35+
)
4836
]);
4937

5038
$application = new Application('Cloud Speech');
@@ -61,11 +49,7 @@
6149
)
6250
->setCode(function (InputInterface $input, OutputInterface $output) {
6351
$audioFile = $input->getArgument('audio-file');
64-
$languageCode = $input->getOption('language-code');
65-
transcribe_sync($audioFile, $languageCode, [
66-
'encoding' => $input->getOption('encoding'),
67-
'sampleRateHertz' => $input->getOption('sample-rate'),
68-
]);
52+
transcribe_sync($audioFile);
6953
});
7054

7155
$application->add(new Command('transcribe-gcs'))
@@ -81,35 +65,10 @@
8165
)
8266
->setCode(function (InputInterface $input, OutputInterface $output) {
8367
$audioFile = $input->getArgument('audio-file');
84-
$languageCode = $input->getOption('language-code');
8568
if (!preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $audioFile, $matches)) {
8669
throw new \Exception('Invalid file name. Must be gs://[bucket]/[audiofile]');
8770
}
88-
list($bucketName, $objectName) = array_slice($matches, 1);
89-
transcribe_sync_gcs($bucketName, $objectName, $languageCode, [
90-
'encoding' => $input->getOption('encoding'),
91-
'sampleRateHertz' => $input->getOption('sample-rate'),
92-
]);
93-
});
94-
95-
$application->add(new Command('transcribe-words'))
96-
->setDefinition($inputDefinition)
97-
->setDescription('Transcribe an audio file and print word time offsets using Google Cloud Speech API')
98-
->setHelp(<<<EOF
99-
The <info>%command.name%</info> command transcribes audio from a file using the
100-
Google Cloud Speech API and prints word time offsets.
101-
102-
<info>php %command.full_name% audio_file.wav</info>
103-
104-
EOF
105-
)
106-
->setCode(function (InputInterface $input, OutputInterface $output) {
107-
$audioFile = $input->getArgument('audio-file');
108-
$languageCode = $input->getOption('language-code');
109-
transcribe_sync_words($audioFile, $languageCode, [
110-
'encoding' => $input->getOption('encoding'),
111-
'sampleRateHertz' => $input->getOption('sample-rate'),
112-
]);
71+
transcribe_sync_gcs($audioFile);
11372
});
11473

11574
$application->add(new Command('transcribe-model'))
@@ -174,11 +133,7 @@
174133
)
175134
->setCode(function (InputInterface $input, OutputInterface $output) {
176135
$audioFile = $input->getArgument('audio-file');
177-
$languageCode = $input->getOption('language-code');
178-
transcribe_async($audioFile, $languageCode, [
179-
'encoding' => $input->getOption('encoding'),
180-
'sampleRateHertz' => $input->getOption('sample-rate'),
181-
]);
136+
transcribe_async($audioFile);
182137
});
183138

184139
$application->add(new Command('transcribe-async-gcs'))
@@ -194,15 +149,10 @@
194149
)
195150
->setCode(function (InputInterface $input, OutputInterface $output) {
196151
$audioFile = $input->getArgument('audio-file');
197-
$languageCode = $input->getOption('language-code');
198152
if (!preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $audioFile, $matches)) {
199153
throw new \Exception('Invalid file name. Must be gs://[bucket]/[audiofile]');
200154
}
201-
list($bucketName, $objectName) = array_slice($matches, 1);
202-
transcribe_async_gcs($bucketName, $objectName, $languageCode, [
203-
'encoding' => $input->getOption('encoding'),
204-
'sampleRateHertz' => $input->getOption('sample-rate'),
205-
]);
155+
transcribe_async_gcs($audioFile);
206156
});
207157

208158
$application->add(new Command('transcribe-async-words'))
@@ -218,11 +168,7 @@
218168
)
219169
->setCode(function (InputInterface $input, OutputInterface $output) {
220170
$audioFile = $input->getArgument('audio-file');
221-
$languageCode = $input->getOption('language-code');
222-
transcribe_async_words($audioFile, $languageCode, [
223-
'encoding' => $input->getOption('encoding'),
224-
'sampleRateHertz' => $input->getOption('sample-rate'),
225-
]);
171+
transcribe_async_words($audioFile);
226172
});
227173

228174
$application->add(new Command('transcribe-stream'))
@@ -238,10 +184,7 @@
238184
)
239185
->setCode(function (InputInterface $input, OutputInterface $output) {
240186
streaming_recognize(
241-
$input->getArgument('audio-file'),
242-
$input->getOption('language-code'),
243-
$input->getOption('encoding'),
244-
$input->getOption('sample-rate')
187+
$input->getArgument('audio-file')
245188
);
246189
});
247190

speech/src/streaming_recognize.php

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,24 @@
4646
*
4747
* @return string the text transcription
4848
*/
49-
function streaming_recognize($audioFile, $languageCode, $encoding, $sampleRateHertz)
49+
function streaming_recognize($audioFile)
5050
{
51-
if (!defined('Grpc\STATUS_OK')) {
51+
// change these variables
52+
$encoding = AudioEncoding::LINEAR16;
53+
$sampleRateHertz = 32000;
54+
$languageCode = 'en-US';
55+
56+
if (!extension_loaded('grpc')) {
5257
throw new \Exception('Install the grpc extension ' .
5358
'(pecl install grpc)');
5459
}
5560

5661
$speechClient = new SpeechClient();
5762
try {
58-
$config = new RecognitionConfig();
59-
$config->setLanguageCode($languageCode);
60-
$config->setSampleRateHertz($sampleRateHertz);
61-
// encoding must be an enum, convert from string
62-
$encodingEnum = constant(AudioEncoding::class . '::' . $encoding);
63-
$config->setEncoding($encodingEnum);
63+
$config = (new RecognitionConfig())
64+
->setEncoding($encoding)
65+
->setSampleRateHertz($sampleRateHertz)
66+
->setLanguageCode($languageCode);
6467

6568
$strmConfig = new StreamingRecognitionConfig();
6669
$strmConfig->setConfig($config);
@@ -72,10 +75,8 @@ function streaming_recognize($audioFile, $languageCode, $encoding, $sampleRateHe
7275
$strm->write($strmReq);
7376

7477
$strmReq = new StreamingRecognizeRequest();
75-
$f = fopen($audioFile, "rb");
76-
$fsize = filesize($audioFile);
77-
$bytes = fread($f, $fsize);
78-
$strmReq->setAudioContent($bytes);
78+
$content = file_get_contents($audioFile);
79+
$strmReq->setAudioContent($content);
7980
$strm->write($strmReq);
8081

8182
foreach ($strm->closeWriteAndReadAll() as $response) {

speech/src/transcribe_async.php

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323

2424
namespace Google\Cloud\Samples\Speech;
2525

26-
use Exception;
2726
# [START speech_transcribe_async]
28-
use Google\Cloud\Speech\SpeechClient;
29-
use Google\Cloud\Core\ExponentialBackoff;
27+
use Google\Cloud\Speech\V1\SpeechClient;
28+
use Google\Cloud\Speech\V1\RecognitionAudio;
29+
use Google\Cloud\Speech\V1\RecognitionConfig;
30+
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
3031

3132
/**
3233
* Transcribe an audio file using Google Cloud Speech API
@@ -36,43 +37,53 @@
3637
* ```.
3738
*
3839
* @param string $audioFile path to an audio file.
39-
* @param string $languageCode The language of the content to
40-
* be recognized. Accepts BCP-47 (e.g., `"en-US"`, `"es-ES"`).
41-
* @param array $options configuration options.
4240
*
4341
* @return string the text transcription
4442
*/
45-
function transcribe_async($audioFile, $languageCode = 'en-US', $options = [])
43+
function transcribe_async($audioFile)
4644
{
47-
// Create the speech client
48-
$speech = new SpeechClient([
49-
'languageCode' => $languageCode,
50-
]);
45+
// change these variables
46+
$encoding = AudioEncoding::LINEAR16;
47+
$sampleRateHertz = 32000;
48+
$languageCode = 'en-US';
5149

52-
// Create the asyncronous recognize operation
53-
$operation = $speech->beginRecognizeOperation(
54-
fopen($audioFile, 'r'),
55-
$options
56-
);
50+
// get contents of a file into a string
51+
$content = file_get_contents($audioFile);
5752

58-
// Wait for the operation to complete
59-
$backoff = new ExponentialBackoff(10);
60-
$backoff->execute(function () use ($operation) {
61-
print('Waiting for operation to complete' . PHP_EOL);
62-
$operation->reload();
63-
if (!$operation->isComplete()) {
64-
throw new Exception('Job has not yet completed', 500);
65-
}
66-
});
53+
// set string as audio content
54+
$audio = (new RecognitionAudio())
55+
->setContent($content);
56+
57+
// set config
58+
$config = (new RecognitionConfig())
59+
->setEncoding($encoding)
60+
->setSampleRateHertz($sampleRateHertz)
61+
->setLanguageCode($languageCode);
62+
63+
// create the speech client
64+
$client = new SpeechClient();
6765

68-
// Print the results
69-
if ($operation->isComplete()) {
70-
$results = $operation->results();
71-
foreach ($results as $result) {
72-
$alternative = $result->alternatives()[0];
73-
printf('Transcript: %s' . PHP_EOL, $alternative['transcript']);
74-
printf('Confidence: %s' . PHP_EOL, $alternative['confidence']);
66+
// create the asyncronous recognize operation
67+
$operation = $client->longRunningRecognize($config, $audio);
68+
$operation->pollUntilComplete();
69+
70+
if ($operation->operationSucceeded()) {
71+
$response = $operation->getResult();
72+
73+
// each result is for a consecutive portion of the audio. iterate
74+
// through them to get the transcripts for the entire audio file.
75+
foreach ($response->getResults() as $result) {
76+
$alternatives = $result->getAlternatives();
77+
$mostLikely = $alternatives[0];
78+
$transcript = $mostLikely->getTranscript();
79+
$confidence = $mostLikely->getConfidence();
80+
printf('Transcript: %s' . PHP_EOL, $transcript);
81+
printf('Confidence: %s' . PHP_EOL, $confidence);
7582
}
83+
} else {
84+
print_r($operation->getError());
7685
}
86+
87+
$client->close();
7788
}
7889
# [END speech_transcribe_async]

0 commit comments

Comments
 (0)