77# create a speech recognition object
88r = sr .Recognizer ()
99
10- # a function that splits the audio file into chunks
10+ # a function to recognize speech in the audio file
11+ # so that we don't repeat ourselves in in other functions
12+ def transcribe_audio (path ):
13+ # use the audio file as the audio source
14+ with sr .AudioFile (path ) as source :
15+ audio_listened = r .record (source )
16+ # try converting it to text
17+ text = r .recognize_google (audio_listened )
18+ return text
19+
20+ # a function that splits the audio file into chunks on silence
1121# and applies speech recognition
12- def get_large_audio_transcription (path ):
13- """
14- Splitting the large audio file into chunks
15- and apply speech recognition on each of these chunks
16- """
22+ def get_large_audio_transcription_on_silence (path ):
23+ """Splitting the large audio file into chunks
24+ and apply speech recognition on each of these chunks"""
1725 # open the audio file using pydub
18- sound = AudioSegment .from_wav (path )
19- # split audio sound where silence is 700 miliseconds or more and get chunks
26+ sound = AudioSegment .from_file (path )
27+ # split audio sound where silence is 500 miliseconds or more and get chunks
2028 chunks = split_on_silence (sound ,
2129 # experiment with this value for your target audio file
2230 min_silence_len = 500 ,
@@ -37,24 +45,59 @@ def get_large_audio_transcription(path):
3745 chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
3846 audio_chunk .export (chunk_filename , format = "wav" )
3947 # recognize the chunk
40- with sr .AudioFile (chunk_filename ) as source :
41- audio_listened = r .record (source )
42- # try converting it to text
43- try :
44- text = r .recognize_google (audio_listened )
45- except sr .UnknownValueError as e :
46- print ("Error:" , str (e ))
47- else :
48- text = f"{ text .capitalize ()} . "
49- print (chunk_filename , ":" , text )
50- whole_text += text
48+ try :
49+ text = transcribe_audio (chunk_filename )
50+ except sr .UnknownValueError as e :
51+ print ("Error:" , str (e ))
52+ else :
53+ text = f"{ text .capitalize ()} . "
54+ print (chunk_filename , ":" , text )
55+ whole_text += text
5156 # return the text for all chunks detected
5257 return whole_text
5358
5459
60+ # a function that splits the audio file into fixed interval chunks
61+ # and applies speech recognition
62+ def get_large_audio_transcription_fixed_interval (path , minutes = 5 ):
63+ """Splitting the large audio file into fixed interval chunks
64+ and apply speech recognition on each of these chunks"""
65+ # open the audio file using pydub
66+ sound = AudioSegment .from_file (path )
67+ # split the audio file into chunks
68+ chunk_length_ms = int (1000 * 60 * minutes ) # convert to milliseconds
69+ chunks = [sound [i :i + chunk_length_ms ] for i in range (0 , len (sound ), chunk_length_ms )]
70+ folder_name = "audio-fixed-chunks"
71+ # create a directory to store the audio chunks
72+ if not os .path .isdir (folder_name ):
73+ os .mkdir (folder_name )
74+ whole_text = ""
75+ # process each chunk
76+ for i , audio_chunk in enumerate (chunks , start = 1 ):
77+ # export audio chunk and save it in
78+ # the `folder_name` directory.
79+ chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
80+ audio_chunk .export (chunk_filename , format = "wav" )
81+ # recognize the chunk
82+ try :
83+ text = transcribe_audio (chunk_filename )
84+ except sr .UnknownValueError as e :
85+ print ("Error:" , str (e ))
86+ else :
87+ text = f"{ text .capitalize ()} . "
88+ print (chunk_filename , ":" , text )
89+ whole_text += text
90+ # return the text for all chunks detected
91+ return whole_text
92+
93+
94+
5595if __name__ == '__main__' :
5696 import sys
5797 # path = "30-4447-0004.wav"
5898 # path = "7601-291468-0006.wav"
5999 path = sys .argv [1 ]
60- print ("\n Full text:" , get_large_audio_transcription (path ))
100+ print ("\n Full text:" , get_large_audio_transcription_on_silence (path ))
101+ print ("=" * 50 )
102+ print ("\n Full text:" , get_large_audio_transcription_fixed_interval (path , minutes = 1 / 6 ))
103+
0 commit comments