From 18493f2b434e463bb64e3a05e40c9ff6c6b2bbfc Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Thu, 2 Nov 2023 16:15:46 +0530 Subject: [PATCH 1/7] Create mfcc.h --- inference-app/src/mfcc.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 inference-app/src/mfcc.h diff --git a/inference-app/src/mfcc.h b/inference-app/src/mfcc.h new file mode 100644 index 0000000..32cb20a --- /dev/null +++ b/inference-app/src/mfcc.h @@ -0,0 +1,29 @@ +#ifndef MFCC_H +#define MFCC_H + +#include +#include + +class MFCC { +public: + MFCC(int num_mfcc_coeffs, int frame_size, int num_fft_points); + ~MFCC(); + + int init(); + void extract_mfcc(const int16_t* input, float32_t* output); + +private: + int _num_mfcc_coeffs; + int _frame_size; + int _num_fft_points; + int16_t* _hanning_window; + int16_t* _mel_filterbank; + + // Other private member variables and functions specific to the implementation + + // Define any private functions used internally by the class + void apply_mel_filterbank(const int16_t* spectrum, float32_t* mel_energies); + void compute_mfcc(float32_t* mfcc_output, const float32_t* mel_energies); +}; + +#endif // MFCC_H From bd7a29a8b563168351fc49100bd0a76170d25332 Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Thu, 2 Nov 2023 16:17:59 +0530 Subject: [PATCH 2/7] Create mfcc.cpp --- inference-app/src/mfcc.cpp | 100 +++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 inference-app/src/mfcc.cpp diff --git a/inference-app/src/mfcc.cpp b/inference-app/src/mfcc.cpp new file mode 100644 index 0000000..ea3ebbd --- /dev/null +++ b/inference-app/src/mfcc.cpp @@ -0,0 +1,100 @@ +#include "mfcc.h" + +MFCC::MFCC(int num_mfcc_coeffs, int frame_size, int num_fft_points) : + _num_mfcc_coeffs(num_mfcc_coeffs), + _frame_size(frame_size), + _num_fft_points(num_fft_points), + _hanning_window(NULL), + _mel_filterbank(NULL) +{ +} + +MFCC::~MFCC() +{ + if (_hanning_window != NULL) { + delete [] _hanning_window; + _hanning_window = NULL; + } + if (_mel_filterbank != NULL) { + delete [] _mel_filterbank; + _mel_filterbank = NULL; + } +} + +int MFCC::init() +{ + // Initialize the hanning window and Mel filterbank, similar to DSPPipeline::init + + return 1; +} + +void MFCC::extract_mfcc(const int16_t* input, float32_t* output) +{ + int16_t windowed_input[_frame_size]; + int16_t fft_q15[_frame_size * 2]; + + // Apply the MFCC pipeline: Hanning Window + FFT + arm_mult_q15(_hanning_window, input, windowed_input, _frame_size); + arm_rfft_q15(&_S_q15, windowed_input, fft_q15); + + // Calculate the magnitude spectrum (similar to DSPPipeline::calculate_spectrum) + // Compute the power spectrum + arm_cmplx_mag_q15(fft_q15, fft_mag_q15, _frame_size / 2 + 1); + + // Apply Mel filterbank to the power spectrum (specific to MFCC) + float32_t mel_energies[_num_mfcc_coeffs]; + apply_mel_filterbank(fft_mag_q15, mel_energies); + + // Compute the logarithm of the mel energies + for (int i = 0; i < _num_mfcc_coeffs; i++) { + mel_energies[i] = logf(mel_energies[i]); + } + + // Apply DCT (Discrete Cosine Transform) to obtain MFCC coefficients + compute_mfcc(output, mel_energies); +} + +void MFCC::apply_mel_filterbank(const int16_t* spectrum, float32_t* mel_energies) +{ + // Define the filter bank parameters + int num_filter_banks = 13; + int filter_bank_size = _frame_size / 2 + 1; // Half of the FFT size + + // Initialize the mel filterbank + if (_mel_filterbank == NULL) { + _mel_filterbank = new float32_t[num_filter_banks * filter_bank_size]; + + // Initialize the mel filterbank with appropriate filter shapes + // You can use equations like Triangular, Hanning, or other shapes for filters + // Fill _mel_filterbank with filter coefficients based on filter bank parameters + // Ensure that the coefficients sum to 1 for each filter + // This step is essential and depends on your specific filterbank design. + } + + // Apply the mel filter bank to the spectrum + for (int i = 0; i < num_filter_banks; i++) { + mel_energies[i] = 0.0; + for (int j = 0; j < filter_bank_size; j++) { + mel_energies[i] += _mel_filterbank[i * filter_bank_size + j] * spectrum[j]; + } + } +} + +void MFCC::compute_mfcc(float32_t* mfcc_output, const float32_t* mel_energies) +{ + // Define the number of MFCC coefficients + int num_mfcc_coeffs = 9; + + // Initialize the DCT matrix (you can precompute it) + // It's a matrix of size num_mfcc_coeffs x 13 (number of filter banks) + // You can find precomputed DCT matrices in DSP libraries or compute it manually + + // Compute the DCT of the mel energies to obtain MFCC coefficients + for (int i = 0; i < num_mfcc_coeffs; i++) { + mfcc_output[i] = 0.0; + for (int j = 0; j < 13; j++) { + mfcc_output[i] += mel_energies[j] * dct_matrix[i * 13 + j]; + } + } +} + From 45209746df4ad094f75ad07482c3c2f1fa55afd6 Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Thu, 2 Nov 2023 18:10:39 +0530 Subject: [PATCH 3/7] Update mfcc.cpp --- inference-app/src/mfcc.cpp | 141 +++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/inference-app/src/mfcc.cpp b/inference-app/src/mfcc.cpp index ea3ebbd..b4ca894 100644 --- a/inference-app/src/mfcc.cpp +++ b/inference-app/src/mfcc.cpp @@ -1,4 +1,145 @@ #include "mfcc.h" +def preemphasis(xn, alpha = 0.97): + # Using pre-empaphases with a certain alpha + pre_e_xn = np.zeros((xn.shape)) + + pre_e_xn[0] = xn[0] + + pre_e_xn[1:] = xn[1:] - alpha * xn[:-1] + + return pre_e_xn + +def get_mel_from_hertz(hertz): + return 2595 * np.log10(1 + (hertz/ 700)) + +def get_hertz_from_mel(mel): + return 700 * (10**(mel / 2595) - 1) + +def get_power_spectrum(xn_mag, fft_size=2048): + return (1/fft_size) * np.power(xn_mag, 2) + +def get_triangle_function(prev_freq, cur_freq, nex_freq, filter_banks, bin_fb): + + # Ascending Triangle + + for freq in range(int(prev_freq), int(cur_freq)): + + filter_banks[bin_fb-1,freq] = (freq - prev_freq)/(cur_freq-prev_freq) + + # Descending Triangle + + for freq in range(int(cur_freq+1), int(nex_freq)): + + filter_banks[bin_fb-1, freq] = (nex_freq-freq)/(nex_freq-cur_freq) + + # Triangle Tip + + filter_banks[bin_fb-1, int(cur_freq)] = 1 + + return filter_banks + +def mel_filter_banks(xn_pow, sr, number_filters, fft_size=2048): + min_mel = 0 + max_mel = get_mel_from_hertz(sr/2) + + mel_freq_points = np.linspace(min_mel, max_mel, num=number_filters+2) + hertz_freq_points = get_hertz_from_mel(mel_freq_points) + + corresponding_bins_hertz_points = np.floor((fft_size + 1) * hertz_freq_points / sr) + + # Filter banks have to be of shape number_filters * (fft_size/2) + 1 + filter_banks = np.zeros((number_filters, int(fft_size/2)+1)) + + for bin_fb in range(1, number_filters+1): + + prev_bin = corresponding_bins_hertz_points[bin_fb-1] + current_bin = corresponding_bins_hertz_points[bin_fb] + next_bin = corresponding_bins_hertz_points[bin_fb+1] + + # Use the triangle function to get the values of the banks + + filter_banks = get_triangle_function(prev_bin, current_bin, next_bin, filter_banks, bin_fb) + + return filter_banks + +def get_delta_values(x): + delta_x = np.zeros(shape=x.shape) + for i in range(1,x.shape[1]-1): + prev_val = x[:,i-1] + next_val = x[:,i+1] + + delta_x[:,i] = (next_val - prev_val)/2 + + return delta_x + +def mfcc(xn, sr, number_filters, window_size = 500, hopsize=int(500/4), fft_size=512): + + # Pre-emphasis + + xn = preemphasis(xn) + + # Getting the STFT + + xn_stft = stft(xn, window_size= window_size, hopsize=hopsize, fft_size=fft_size) + + # Getting the Magnitude of the STFT + + xn_mag = np.abs(xn_stft) + + # Evaluating the Power spectrum for the magnitude + + xn_pow = get_power_spectrum(xn_mag, fft_size=fft_size) + + # To get the mel filter banks + + filter_banks = mel_filter_banks(xn_pow, sr, number_filters, fft_size=fft_size) + + machine_epsilon = 2.22044604925e-16 + + filter_banks[filter_banks==0] = machine_epsilon + + + # Multiply the filter_banks with the power spectrum + + filter_banks_res = np.dot(filter_banks, xn_pow.T) + + # Taking the log and the inverse DFT + + filter_banks_res = filter_banks_res + machine_epsilon + + log_filter_bank = np.log(filter_banks_res) + + idft = sp.fftpack.dct(log_filter_bank) + + # First 12 MFCC Values + + first_12 = idft[:12,:] + + # delta and delta-delta coefficients + + delta = get_delta_values(idft) + + delta_delta = get_delta_values(delta) + + # Getting Energy values of delta and delta-delta coefficients + + first_12_delta = delta[:12,:] + + first_12_delta_delta = delta_delta[:12,:] + + + # Energy of the Cepstrum frame. Read from - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.596.8754&rep=rep1&type=pdf + + energy = np.sqrt(np.sum(np.power(first_12,2),axis=0)).reshape(1,-1) + + energy_delta = np.sqrt(np.sum(np.power(first_12_delta,2),axis=0)).reshape(1,-1) + + energy_delta_delta = np.sqrt(np.sum(np.power(first_12_delta_delta,2),axis=0)).reshape(1,-1) + + return np.vstack((energy, energy_delta, energy_delta_delta, first_12, first_12_delta, first_12_delta_delta)), filter_banks + +mfcc_xn, filter_banks = mfcc(xn, sr, 40) + MFCC::MFCC(int num_mfcc_coeffs, int frame_size, int num_fft_points) : _num_mfcc_coeffs(num_mfcc_coeffs), From 440dc7cfd44727b4afc046a2314460e8611c1624 Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Thu, 2 Nov 2023 20:38:26 +0530 Subject: [PATCH 4/7] Create mfcc_final.cpp --- inference-app/src/mfcc_final.cpp | 191 +++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 inference-app/src/mfcc_final.cpp diff --git a/inference-app/src/mfcc_final.cpp b/inference-app/src/mfcc_final.cpp new file mode 100644 index 0000000..6796371 --- /dev/null +++ b/inference-app/src/mfcc_final.cpp @@ -0,0 +1,191 @@ +#include + +#include "mfcc.h" +#include "float.h" + +MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) +:num_mfcc_features(num_mfcc_features), + frame_len(frame_len), + mfcc_dec_bits(mfcc_dec_bits) +{ + + // Round-up to nearest power of 2. + frame_len_padded = pow(2,ceil((log(frame_len)/log(2)))); + + frame = new float[frame_len_padded]; + buffer = new float[frame_len_padded]; + mel_energies = new float[NUM_FBANK_BINS]; + + //create window function + window_func = new float[frame_len]; + for (int i = 0; i < frame_len; i++) + window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len)); + + //create mel filterbank + fbank_filter_first = new int32_t[NUM_FBANK_BINS]; + fbank_filter_last = new int32_t[NUM_FBANK_BINS];; + mel_fbank = create_mel_fbank(); + + //create DCT matrix + dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features); + + //initialize FFT + rfft = new arm_rfft_fast_instance_f32; + arm_rfft_fast_init_f32(rfft, frame_len_padded); + +} + +MFCC::~MFCC() { + delete []frame; + delete [] buffer; + delete []mel_energies; + delete []window_func; + delete []fbank_filter_first; + delete []fbank_filter_last; + delete []dct_matrix; + delete rfft; + for(int i=0;i left_mel && mel < right_mel) { + float weight; + if (mel <= center_mel) { + weight = (mel - left_mel) / (center_mel - left_mel); + } else { + weight = (right_mel-mel) / (right_mel-center_mel); + } + this_bin[i] = weight; + if (first_index == -1) + first_index = i; + last_index = i; + } + } + + fbank_filter_first[bin] = first_index; + fbank_filter_last[bin] = last_index; + mel_fbank[bin] = new float[last_index-first_index+1]; + + int32_t j = 0; + //copy the part we care about + for (i = first_index; i <= last_index; i++) { + mel_fbank[bin][j++] = this_bin[i]; + } + } + delete []this_bin; + return mel_fbank; +} + +void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) { + + int32_t i, j, bin; + + //TensorFlow way of normalizing .wav data to (-1,1) + for (i = 0; i < frame_len; i++) { + frame[i] = (float)audio_data[i]/(1<<15); + } + //Fill up remaining with zeros + memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len)); + + for (i = 0; i < frame_len; i++) { + frame[i] *= window_func[i]; + } + + //Compute FFT + arm_rfft_fast_f32(rfft, frame, buffer, 0); + + //Convert to power spectrum + //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...] + int32_t half_dim = frame_len_padded/2; + float first_energy = buffer[0] * buffer[0], + last_energy = buffer[1] * buffer[1]; // handle this special case + for (i = 1; i < half_dim; i++) { + float real = buffer[i*2], im = buffer[i*2 + 1]; + buffer[i] = real*real + im*im; + } + buffer[0] = first_energy; + buffer[half_dim] = last_energy; + + float sqrt_data; + //Apply mel filterbanks + for (bin = 0; bin < NUM_FBANK_BINS; bin++) { + j = 0; + float mel_energy = 0; + int32_t first_index = fbank_filter_first[bin]; + int32_t last_index = fbank_filter_last[bin]; + for (i = first_index; i <= last_index; i++) { + arm_sqrt_f32(buffer[i],&sqrt_data); + mel_energy += (sqrt_data) * mel_fbank[bin][j++]; + } + mel_energies[bin] = mel_energy; + + //avoid log of zero + if (mel_energy == 0.0) + mel_energies[bin] = FLT_MIN; + } + + //Take log + for (bin = 0; bin < NUM_FBANK_BINS; bin++) + mel_energies[bin] = logf(mel_energies[bin]); + + //Take DCT. Uses matrix mul. + for (i = 0; i < num_mfcc_features; i++) { + float sum = 0.0; + for (j = 0; j < NUM_FBANK_BINS; j++) { + sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j]; + } + + //Input is Qx.mfcc_dec_bits (from quantization step) + sum *= (0x1<= 127) + mfcc_out[i] = 127; + else if(sum <= -128) + mfcc_out[i] = -128; + else + mfcc_out[i] = sum; + } + +} From de4a9e731f1d74157d94eef9309ca69b18a3830c Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Thu, 2 Nov 2023 20:39:07 +0530 Subject: [PATCH 5/7] Create mfcc_final.h --- inference-app/src/mfcc_final.h | 47 ++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 inference-app/src/mfcc_final.h diff --git a/inference-app/src/mfcc_final.h b/inference-app/src/mfcc_final.h new file mode 100644 index 0000000..3c5a2d1 --- /dev/null +++ b/inference-app/src/mfcc_final.h @@ -0,0 +1,47 @@ + +#ifndef __KWS_MFCC_H__ +#define __KWS_MFCC_H__ + +#include "arm_math.h" +#include "string.h" + +#define SAMP_FREQ 16000 +#define NUM_FBANK_BINS 40 +#define MEL_LOW_FREQ 20 +#define MEL_HIGH_FREQ 4000 + +#define M_2PI 6.283185307179586476925286766559005 + +class MFCC{ + private: + int num_mfcc_features; + int frame_len; + int frame_len_padded; + int mfcc_dec_bits; + float * frame; + float * buffer; + float * mel_energies; + float * window_func; + int32_t * fbank_filter_first; + int32_t * fbank_filter_last; + float ** mel_fbank; + float * dct_matrix; + arm_rfft_fast_instance_f32 * rfft; + float * create_dct_matrix(int32_t input_length, int32_t coefficient_count); + float ** create_mel_fbank(); + + static inline float InverseMelScale(float mel_freq) { + return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f); + } + + static inline float MelScale(float freq) { + return 1127.0f * logf (1.0f + freq / 700.0f); + } + + public: + MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits); + ~MFCC(); + void mfcc_compute(const int16_t* data, q7_t* mfcc_out); +}; + +#endif From e9475114f7520a25aee36591ad370aeeacb5bab2 Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Fri, 3 Nov 2023 11:23:50 +0530 Subject: [PATCH 6/7] Update mfcc_final.cpp --- inference-app/src/mfcc_final.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference-app/src/mfcc_final.cpp b/inference-app/src/mfcc_final.cpp index 6796371..cd7e6c4 100644 --- a/inference-app/src/mfcc_final.cpp +++ b/inference-app/src/mfcc_final.cpp @@ -1,6 +1,6 @@ #include -#include "mfcc.h" +#include "mfccfinal.h" #include "float.h" MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) From eab539defc6db4117b540d7f2c91fb3dcd3cec53 Mon Sep 17 00:00:00 2001 From: akriti-github <82258844+akriti-github@users.noreply.github.com> Date: Fri, 3 Nov 2023 11:24:14 +0530 Subject: [PATCH 7/7] Rename mfcc_final.h to mfccfinal.h --- inference-app/src/{mfcc_final.h => mfccfinal.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename inference-app/src/{mfcc_final.h => mfccfinal.h} (100%) diff --git a/inference-app/src/mfcc_final.h b/inference-app/src/mfccfinal.h similarity index 100% rename from inference-app/src/mfcc_final.h rename to inference-app/src/mfccfinal.h