1+ # importing libraries
2+ import speech_recognition as sr
3+ import os
4+ from pydub import AudioSegment
5+ from pydub .silence import split_on_silence
6+
7+ # create a speech recognition object
8+ r = sr .Recognizer ()
9+
10+ # a function that splits the audio file into chunks
11+ # and applies speech recognition
12+ def get_large_audio_transcription (path ):
13+ """
14+ Splitting the large audio file into chunks
15+ and apply speech recognition on each of these chunks
16+ """
17+ # open the audio file using pydub
18+ sound = AudioSegment .from_wav (path )
19+ # split audio sound where silence is 700 miliseconds or more and get chunks
20+ chunks = split_on_silence (sound ,
21+ # experiment with this value for your target audio file
22+ min_silence_len = 500 ,
23+ # adjust this per requirement
24+ silence_thresh = sound .dBFS - 14 ,
25+ # keep the silence for 1 second, adjustable as well
26+ keep_silence = 500 ,
27+ )
28+ folder_name = "audio-chunks"
29+ # create a directory to store the audio chunks
30+ if not os .path .isdir (folder_name ):
31+ os .mkdir (folder_name )
32+ whole_text = ""
33+ # process each chunk
34+ for i , audio_chunk in enumerate (chunks , start = 1 ):
35+ # export audio chunk and save it in
36+ # the `folder_name` directory.
37+ chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
38+ audio_chunk .export (chunk_filename , format = "wav" )
39+ # recognize the chunk
40+ with sr .AudioFile (chunk_filename ) as source :
41+ audio_listened = r .record (source )
42+ # try converting it to text
43+ try :
44+ text = r .recognize_google (audio_listened )
45+ except sr .UnknownValueError as e :
46+ print ("Error:" , str (e ))
47+ else :
48+ text = f"{ text .capitalize ()} . "
49+ print (chunk_filename , ":" , text )
50+ whole_text += text
51+ # return the text for all chunks detected
52+ return whole_text
53+
54+
55+ if __name__ == '__main__' :
56+ import sys
57+ # path = "30-4447-0004.wav"
58+ # path = "7601-291468-0006.wav"
59+ path = sys .argv [1 ]
60+ print ("\n Full text:" , get_large_audio_transcription (path ))
0 commit comments