From 18493f2b434e463bb64e3a05e40c9ff6c6b2bbfc Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Thu, 2 Nov 2023 16:15:46 +0530
Subject: [PATCH 1/7] Create mfcc.h

---
 inference-app/src/mfcc.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 inference-app/src/mfcc.h

diff --git a/inference-app/src/mfcc.h b/inference-app/src/mfcc.h
new file mode 100644
index 0000000..32cb20a
--- /dev/null
+++ b/inference-app/src/mfcc.h
@@ -0,0 +1,29 @@
+#ifndef MFCC_H
+#define MFCC_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+class MFCC {
+public:
+    MFCC(int num_mfcc_coeffs, int frame_size, int num_fft_points);
+    ~MFCC();
+
+    int init();
+    void extract_mfcc(const int16_t* input, float32_t* output);
+
+private:
+    int _num_mfcc_coeffs;
+    int _frame_size;
+    int _num_fft_points;
+    int16_t* _hanning_window;
+    int16_t* _mel_filterbank;
+
+    // Other private member variables and functions specific to the implementation
+
+    // Define any private functions used internally by the class
+    void apply_mel_filterbank(const int16_t* spectrum, float32_t* mel_energies);
+    void compute_mfcc(float32_t* mfcc_output, const float32_t* mel_energies);
+};
+
+#endif  // MFCC_H

From bd7a29a8b563168351fc49100bd0a76170d25332 Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Thu, 2 Nov 2023 16:17:59 +0530
Subject: [PATCH 2/7] Create mfcc.cpp

---
 inference-app/src/mfcc.cpp | 100 +++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 inference-app/src/mfcc.cpp

diff --git a/inference-app/src/mfcc.cpp b/inference-app/src/mfcc.cpp
new file mode 100644
index 0000000..ea3ebbd
--- /dev/null
+++ b/inference-app/src/mfcc.cpp
@@ -0,0 +1,100 @@
+#include "mfcc.h"
+
+MFCC::MFCC(int num_mfcc_coeffs, int frame_size, int num_fft_points) :
+    _num_mfcc_coeffs(num_mfcc_coeffs),
+    _frame_size(frame_size),
+    _num_fft_points(num_fft_points),
+    _hanning_window(NULL),
+    _mel_filterbank(NULL)
+{
+}
+
+MFCC::~MFCC()
+{
+    if (_hanning_window != NULL) {
+        delete [] _hanning_window;
+        _hanning_window = NULL;
+    }
+    if (_mel_filterbank != NULL) {
+        delete [] _mel_filterbank;
+        _mel_filterbank = NULL;
+    }
+}
+
+int MFCC::init()
+{
+    // Initialize the hanning window and Mel filterbank, similar to DSPPipeline::init
+
+    return 1;
+}
+
+void MFCC::extract_mfcc(const int16_t* input, float32_t* output)
+{
+    int16_t windowed_input[_frame_size];
+    int16_t fft_q15[_frame_size * 2];
+
+    // Apply the MFCC pipeline: Hanning Window + FFT
+    arm_mult_q15(_hanning_window, input, windowed_input, _frame_size);
+    arm_rfft_q15(&_S_q15, windowed_input, fft_q15);
+
+    // Calculate the magnitude spectrum (similar to DSPPipeline::calculate_spectrum)
+    // Compute the power spectrum
+    arm_cmplx_mag_q15(fft_q15, fft_mag_q15, _frame_size / 2 + 1);
+    
+    // Apply Mel filterbank to the power spectrum (specific to MFCC)
+    float32_t mel_energies[_num_mfcc_coeffs];
+    apply_mel_filterbank(fft_mag_q15, mel_energies);
+
+    // Compute the logarithm of the mel energies
+    for (int i = 0; i < _num_mfcc_coeffs; i++) {
+        mel_energies[i] = logf(mel_energies[i]);
+    }
+
+    // Apply DCT (Discrete Cosine Transform) to obtain MFCC coefficients
+    compute_mfcc(output, mel_energies);
+}
+
+void MFCC::apply_mel_filterbank(const int16_t* spectrum, float32_t* mel_energies)
+{
+    // Define the filter bank parameters
+    int num_filter_banks = 13;
+    int filter_bank_size = _frame_size / 2 + 1; // Half of the FFT size
+    
+    // Initialize the mel filterbank
+    if (_mel_filterbank == NULL) {
+        _mel_filterbank = new float32_t[num_filter_banks * filter_bank_size];
+        
+        // Initialize the mel filterbank with appropriate filter shapes
+        // You can use equations like Triangular, Hanning, or other shapes for filters
+        // Fill _mel_filterbank with filter coefficients based on filter bank parameters
+        // Ensure that the coefficients sum to 1 for each filter
+        // This step is essential and depends on your specific filterbank design.
+    }
+    
+    // Apply the mel filter bank to the spectrum
+    for (int i = 0; i < num_filter_banks; i++) {
+        mel_energies[i] = 0.0;
+        for (int j = 0; j < filter_bank_size; j++) {
+            mel_energies[i] += _mel_filterbank[i * filter_bank_size + j] * spectrum[j];
+        }
+    }
+}
+
+void MFCC::compute_mfcc(float32_t* mfcc_output, const float32_t* mel_energies)
+{
+    // Define the number of MFCC coefficients
+    int num_mfcc_coeffs = 9;
+    
+    // Initialize the DCT matrix (you can precompute it)
+    // It's a matrix of size num_mfcc_coeffs x 13 (number of filter banks)
+    // You can find precomputed DCT matrices in DSP libraries or compute it manually
+    
+    // Compute the DCT of the mel energies to obtain MFCC coefficients
+    for (int i = 0; i < num_mfcc_coeffs; i++) {
+        mfcc_output[i] = 0.0;
+        for (int j = 0; j < 13; j++) {
+            mfcc_output[i] += mel_energies[j] * dct_matrix[i * 13 + j];
+        }
+    }
+}
+

From 45209746df4ad094f75ad07482c3c2f1fa55afd6 Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Thu, 2 Nov 2023 18:10:39 +0530
Subject: [PATCH 3/7] Update mfcc.cpp

---
 inference-app/src/mfcc.cpp | 141 +++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)

diff --git a/inference-app/src/mfcc.cpp b/inference-app/src/mfcc.cpp
index ea3ebbd..b4ca894 100644
--- a/inference-app/src/mfcc.cpp
+++ b/inference-app/src/mfcc.cpp
@@ -1,4 +1,145 @@
 #include "mfcc.h"
+def preemphasis(xn, alpha = 0.97):
+    # Using pre-empaphases with a certain alpha
+    pre_e_xn = np.zeros((xn.shape))
+    
+    pre_e_xn[0] = xn[0]
+    
+    pre_e_xn[1:] = xn[1:] - alpha * xn[:-1]
+    
+    return pre_e_xn
+
+def get_mel_from_hertz(hertz):
+    return 2595 * np.log10(1 + (hertz/ 700))
+
+def get_hertz_from_mel(mel):
+    return 700 * (10**(mel / 2595) - 1)
+
+def get_power_spectrum(xn_mag, fft_size=2048):
+    return (1/fft_size) * np.power(xn_mag, 2)
+
+def get_triangle_function(prev_freq, cur_freq, nex_freq, filter_banks, bin_fb):
+    
+    # Ascending Triangle
+    
+    for freq in range(int(prev_freq), int(cur_freq)):
+        
+        filter_banks[bin_fb-1,freq] = (freq - prev_freq)/(cur_freq-prev_freq)
+        
+    # Descending Triangle
+    
+    for freq in range(int(cur_freq+1), int(nex_freq)):
+        
+        filter_banks[bin_fb-1, freq] = (nex_freq-freq)/(nex_freq-cur_freq)
+        
+    # Triangle Tip
+    
+    filter_banks[bin_fb-1, int(cur_freq)] = 1
+    
+    return filter_banks
+
+def mel_filter_banks(xn_pow, sr, number_filters, fft_size=2048):
+    min_mel = 0
+    max_mel = get_mel_from_hertz(sr/2)
+    
+    mel_freq_points = np.linspace(min_mel, max_mel, num=number_filters+2)
+    hertz_freq_points = get_hertz_from_mel(mel_freq_points)
+    
+    corresponding_bins_hertz_points = np.floor((fft_size + 1) * hertz_freq_points / sr)
+    
+    # Filter banks have to be of shape number_filters * (fft_size/2) + 1
+    filter_banks = np.zeros((number_filters, int(fft_size/2)+1))
+    
+    for bin_fb in range(1, number_filters+1):
+        
+        prev_bin = corresponding_bins_hertz_points[bin_fb-1]
+        current_bin = corresponding_bins_hertz_points[bin_fb]
+        next_bin = corresponding_bins_hertz_points[bin_fb+1]
+        
+        # Use the triangle function to get the values of the banks
+        
+        filter_banks = get_triangle_function(prev_bin, current_bin, next_bin, filter_banks, bin_fb)
+        
+    return filter_banks
+
+def get_delta_values(x):
+    delta_x = np.zeros(shape=x.shape)
+    for i in range(1,x.shape[1]-1):
+        prev_val = x[:,i-1]
+        next_val = x[:,i+1]
+        
+        delta_x[:,i]  = (next_val - prev_val)/2
+    
+    return delta_x
+
+def mfcc(xn, sr, number_filters, window_size = 500, hopsize=int(500/4), fft_size=512):
+    
+    # Pre-emphasis
+    
+    xn = preemphasis(xn)
+    
+    # Getting the STFT
+        
+    xn_stft = stft(xn, window_size= window_size, hopsize=hopsize, fft_size=fft_size)
+    
+    # Getting the Magnitude of the STFT
+    
+    xn_mag = np.abs(xn_stft)
+    
+    # Evaluating the Power spectrum for the magnitude
+    
+    xn_pow = get_power_spectrum(xn_mag, fft_size=fft_size)
+    
+    # To get the mel filter banks
+    
+    filter_banks = mel_filter_banks(xn_pow, sr, number_filters, fft_size=fft_size)
+    
+    machine_epsilon =  2.22044604925e-16
+    
+    filter_banks[filter_banks==0] = machine_epsilon
+    
+    
+    # Multiply the filter_banks with the power spectrum
+    
+    filter_banks_res = np.dot(filter_banks, xn_pow.T)
+    
+    # Taking the log and the inverse DFT
+    
+    filter_banks_res = filter_banks_res + machine_epsilon
+    
+    log_filter_bank = np.log(filter_banks_res)
+    
+    idft = sp.fftpack.dct(log_filter_bank)
+    
+    # First 12 MFCC Values
+    
+    first_12 = idft[:12,:]
+    
+    # delta and delta-delta coefficients
+    
+    delta = get_delta_values(idft)
+    
+    delta_delta = get_delta_values(delta)
+    
+    # Getting Energy values of delta and delta-delta coefficients
+    
+    first_12_delta = delta[:12,:]
+    
+    first_12_delta_delta = delta_delta[:12,:]
+    
+    
+    # Energy of the Cepstrum frame. Read from - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.596.8754&rep=rep1&type=pdf
+    
+    energy = np.sqrt(np.sum(np.power(first_12,2),axis=0)).reshape(1,-1)
+    
+    energy_delta = np.sqrt(np.sum(np.power(first_12_delta,2),axis=0)).reshape(1,-1)
+    
+    energy_delta_delta = np.sqrt(np.sum(np.power(first_12_delta_delta,2),axis=0)).reshape(1,-1)
+    
+    return np.vstack((energy, energy_delta, energy_delta_delta, first_12, first_12_delta, first_12_delta_delta)), filter_banks
+
+mfcc_xn, filter_banks = mfcc(xn, sr, 40)
+
 
 MFCC::MFCC(int num_mfcc_coeffs, int frame_size, int num_fft_points) :
     _num_mfcc_coeffs(num_mfcc_coeffs),

From 440dc7cfd44727b4afc046a2314460e8611c1624 Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Thu, 2 Nov 2023 20:38:26 +0530
Subject: [PATCH 4/7] Create mfcc_final.cpp

---
 inference-app/src/mfcc_final.cpp | 191 +++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 inference-app/src/mfcc_final.cpp

diff --git a/inference-app/src/mfcc_final.cpp b/inference-app/src/mfcc_final.cpp
new file mode 100644
index 0000000..6796371
--- /dev/null
+++ b/inference-app/src/mfcc_final.cpp
@@ -0,0 +1,191 @@
+#include <string.h>
+
+#include "mfcc.h"
+#include "float.h"
+
+MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) 
+:num_mfcc_features(num_mfcc_features), 
+ frame_len(frame_len), 
+ mfcc_dec_bits(mfcc_dec_bits)
+{
+
+  // Round-up to nearest power of 2.
+  frame_len_padded = pow(2,ceil((log(frame_len)/log(2))));
+
+  frame = new float[frame_len_padded];
+  buffer = new float[frame_len_padded];
+  mel_energies = new float[NUM_FBANK_BINS];
+
+  //create window function
+  window_func = new float[frame_len];
+  for (int i = 0; i < frame_len; i++)
+    window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len));
+
+  //create mel filterbank
+  fbank_filter_first = new int32_t[NUM_FBANK_BINS];
+  fbank_filter_last = new int32_t[NUM_FBANK_BINS];;
+  mel_fbank = create_mel_fbank();
+  
+  //create DCT matrix
+  dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features);
+
+  //initialize FFT
+  rfft = new arm_rfft_fast_instance_f32;
+  arm_rfft_fast_init_f32(rfft, frame_len_padded);
+
+}
+
+MFCC::~MFCC() {
+  delete []frame;
+  delete [] buffer;
+  delete []mel_energies;
+  delete []window_func;
+  delete []fbank_filter_first;
+  delete []fbank_filter_last;
+  delete []dct_matrix;
+  delete rfft;
+  for(int i=0;i<NUM_FBANK_BINS;i++)
+    delete mel_fbank[i];
+  delete mel_fbank;
+}
+
+float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) {
+  int32_t k, n;
+  float * M = new float[input_length*coefficient_count];
+  float normalizer;
+  arm_sqrt_f32(2.0/(float)input_length,&normalizer);
+  for (k = 0; k < coefficient_count; k++) {
+    for (n = 0; n < input_length; n++) {
+      M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k );
+    }
+  }
+  return M;
+}
+
+float ** MFCC::create_mel_fbank() {
+
+  int32_t bin, i;
+
+  int32_t num_fft_bins = frame_len_padded/2;
+  float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded;
+  float mel_low_freq = MelScale(MEL_LOW_FREQ);
+  float mel_high_freq = MelScale(MEL_HIGH_FREQ); 
+  float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1);
+
+  float *this_bin = new float[num_fft_bins];
+
+  float ** mel_fbank =  new float*[NUM_FBANK_BINS];
+
+  for (bin = 0; bin < NUM_FBANK_BINS; bin++) {
+
+    float left_mel = mel_low_freq + bin * mel_freq_delta;
+    float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta;
+    float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
+
+    int32_t first_index = -1, last_index = -1;
+
+    for (i = 0; i < num_fft_bins; i++) {
+
+      float freq = (fft_bin_width * i);  // center freq of this fft bin.
+      float mel = MelScale(freq);
+      this_bin[i] = 0.0;
+
+      if (mel > left_mel && mel < right_mel) {
+        float weight;
+        if (mel <= center_mel) {
+          weight = (mel - left_mel) / (center_mel - left_mel);
+        } else {
+          weight = (right_mel-mel) / (right_mel-center_mel);
+        }
+        this_bin[i] = weight;
+        if (first_index == -1)
+          first_index = i;
+        last_index = i;
+      }
+    }
+
+    fbank_filter_first[bin] = first_index;
+    fbank_filter_last[bin] = last_index;
+    mel_fbank[bin] = new float[last_index-first_index+1]; 
+
+    int32_t j = 0;
+    //copy the part we care about
+    for (i = first_index; i <= last_index; i++) {
+      mel_fbank[bin][j++] = this_bin[i];
+    }
+  }
+  delete []this_bin;
+  return mel_fbank;
+}
+
+void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) {
+
+  int32_t i, j, bin;
+
+  //TensorFlow way of normalizing .wav data to (-1,1)
+  for (i = 0; i < frame_len; i++) {
+    frame[i] = (float)audio_data[i]/(1<<15); 
+  }
+  //Fill up remaining with zeros
+  memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len));
+
+  for (i = 0; i < frame_len; i++) {
+    frame[i] *= window_func[i];
+  }
+
+  //Compute FFT
+  arm_rfft_fast_f32(rfft, frame, buffer, 0);
+
+  //Convert to power spectrum
+  //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...]
+  int32_t half_dim = frame_len_padded/2;
+  float first_energy = buffer[0] * buffer[0],
+        last_energy =  buffer[1] * buffer[1];  // handle this special case
+  for (i = 1; i < half_dim; i++) {
+    float real = buffer[i*2], im = buffer[i*2 + 1];
+    buffer[i] = real*real + im*im;
+  }
+  buffer[0] = first_energy;
+  buffer[half_dim] = last_energy;  
+ 
+  float sqrt_data;
+  //Apply mel filterbanks
+  for (bin = 0; bin < NUM_FBANK_BINS; bin++) {
+    j = 0;
+    float mel_energy = 0;
+    int32_t first_index = fbank_filter_first[bin];
+    int32_t last_index = fbank_filter_last[bin];
+    for (i = first_index; i <= last_index; i++) {
+      arm_sqrt_f32(buffer[i],&sqrt_data);
+      mel_energy += (sqrt_data) * mel_fbank[bin][j++];
+    }
+    mel_energies[bin] = mel_energy;
+
+    //avoid log of zero
+    if (mel_energy == 0.0)
+      mel_energies[bin] = FLT_MIN;
+  }
+
+  //Take log
+  for (bin = 0; bin < NUM_FBANK_BINS; bin++)
+    mel_energies[bin] = logf(mel_energies[bin]);
+
+  //Take DCT. Uses matrix mul.
+  for (i = 0; i < num_mfcc_features; i++) {
+    float sum = 0.0;
+    for (j = 0; j < NUM_FBANK_BINS; j++) {
+      sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j];
+    }
+
+    //Input is Qx.mfcc_dec_bits (from quantization step)
+    sum *= (0x1<<mfcc_dec_bits);
+    sum = round(sum); 
+    if(sum >= 127)
+      mfcc_out[i] = 127;
+    else if(sum <= -128)
+      mfcc_out[i] = -128;
+    else
+      mfcc_out[i] = sum; 
+  }
+
+}

From de4a9e731f1d74157d94eef9309ca69b18a3830c Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Thu, 2 Nov 2023 20:39:07 +0530
Subject: [PATCH 5/7] Create mfcc_final.h

---
 inference-app/src/mfcc_final.h | 47 ++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 inference-app/src/mfcc_final.h

diff --git a/inference-app/src/mfcc_final.h b/inference-app/src/mfcc_final.h
new file mode 100644
index 0000000..3c5a2d1
--- /dev/null
+++ b/inference-app/src/mfcc_final.h
@@ -0,0 +1,47 @@
+
+#ifndef __KWS_MFCC_H__
+#define __KWS_MFCC_H__
+
+#include "arm_math.h"
+#include "string.h"
+
+#define SAMP_FREQ 16000
+#define NUM_FBANK_BINS 40
+#define MEL_LOW_FREQ 20
+#define MEL_HIGH_FREQ 4000
+
+#define M_2PI 6.283185307179586476925286766559005
+
+class MFCC{
+  private:
+    int num_mfcc_features;
+    int frame_len;
+    int frame_len_padded;
+    int mfcc_dec_bits;
+    float * frame;
+    float * buffer;
+    float * mel_energies;
+    float * window_func;
+    int32_t * fbank_filter_first;
+    int32_t * fbank_filter_last;
+    float ** mel_fbank;
+    float * dct_matrix;
+    arm_rfft_fast_instance_f32 * rfft;
+    float * create_dct_matrix(int32_t input_length, int32_t coefficient_count); 
+    float ** create_mel_fbank();
+ 
+    static inline float InverseMelScale(float mel_freq) {
+      return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
+    }
+
+    static inline float MelScale(float freq) {
+      return 1127.0f * logf (1.0f + freq / 700.0f);
+    }
+
+  public:
+    MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits);
+    ~MFCC();
+    void mfcc_compute(const int16_t* data, q7_t* mfcc_out);
+};
+
+#endif

From e9475114f7520a25aee36591ad370aeeacb5bab2 Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Fri, 3 Nov 2023 11:23:50 +0530
Subject: [PATCH 6/7] Update mfcc_final.cpp

---
 inference-app/src/mfcc_final.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference-app/src/mfcc_final.cpp b/inference-app/src/mfcc_final.cpp
index 6796371..cd7e6c4 100644
--- a/inference-app/src/mfcc_final.cpp
+++ b/inference-app/src/mfcc_final.cpp
@@ -1,6 +1,6 @@
 #include <string.h>
 
-#include "mfcc.h"
+#include "mfccfinal.h"
 #include "float.h"
 
 MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) 

From eab539defc6db4117b540d7f2c91fb3dcd3cec53 Mon Sep 17 00:00:00 2001
From: akriti-github <82258844+akriti-github@users.noreply.github.com>
Date: Fri, 3 Nov 2023 11:24:14 +0530
Subject: [PATCH 7/7] Rename mfcc_final.h to mfccfinal.h

---
 inference-app/src/{mfcc_final.h => mfccfinal.h} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename inference-app/src/{mfcc_final.h => mfccfinal.h} (100%)

diff --git a/inference-app/src/mfcc_final.h b/inference-app/src/mfccfinal.h
similarity index 100%
rename from inference-app/src/mfcc_final.h
rename to inference-app/src/mfccfinal.h