feat(AVAudioFileExtensions): first approach implementation to generate metering levels

bastienFalcou · bastienFalcou · commit 3c0f9f21cf46 · 2019-04-29T21:06:58.000-04:00
diff --git a/SoundWave/Classes/AVAudioFileExtensions.swift b/SoundWave/Classes/AVAudioFileExtensions.swift
@@ -29,6 +29,60 @@ extension AVAudioFile {
                 result[channel][sampleIndex] = dbPower
             }
         }
+        print(result)
         return result
     }
 }
+
+/// Holds audio information used for building waveforms
+final class AudioContext {
+    /// The audio asset URL used to load the context
+    public let audioURL: URL
+
+    /// Total number of samples in loaded asset
+    public let totalSamples: Int
+
+    /// Loaded asset
+    public let asset: AVAsset
+
+    // Loaded assetTrack
+    public let assetTrack: AVAssetTrack
+
+    private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
+        self.audioURL = audioURL
+        self.totalSamples = totalSamples
+        self.asset = asset
+        self.assetTrack = assetTrack
+    }
+
+    public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) {
+        let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
+
+        guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
+            fatalError("Couldn't load AVAssetTrack")
+        }
+
+        asset.loadValuesAsynchronously(forKeys: ["duration"]) {
+            var error: NSError?
+            let status = asset.statusOfValue(forKey: "duration", error: &error)
+            switch status {
+            case .loaded:
+                guard
+                    let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
+                    let audioFormatDesc = formatDescriptions.first,
+                    let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
+                    else { break }
+
+                let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
+                let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
+                completionHandler(audioContext)
+                return
+
+            case .failed, .cancelled, .loading, .unknown:
+                print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
+            }
+
+            completionHandler(nil)
+        }
+    }
+}
diff --git a/SoundWave/Classes/AudioVisualizationView.swift b/SoundWave/Classes/AudioVisualizationView.swift
@@ -5,6 +5,7 @@
 //  Created by Bastien Falcou on 12/6/16.
 //
 
+import Accelerate
 import AVFoundation
 import UIKit
 
@@ -150,6 +151,154 @@ public class AudioVisualizationView: BaseNibView {
 		return self.meteringLevelsClusteredArray
 	}
 
+    func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
+        guard let audioContext = audioContext else {
+            fatalError("Couldn't create the audioContext")
+        }
+
+        let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3
+
+        guard let reader = try? AVAssetReader(asset: audioContext.asset)
+            else {
+                fatalError("Couldn't initialize the AVAssetReader")
+        }
+
+        reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
+                                       duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
+
+        let outputSettingsDict: [String : Any] = [
+            AVFormatIDKey: Int(kAudioFormatLinearPCM),
+            AVLinearPCMBitDepthKey: 16,
+            AVLinearPCMIsBigEndianKey: false,
+            AVLinearPCMIsFloatKey: false,
+            AVLinearPCMIsNonInterleaved: false
+        ]
+
+        let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
+                                                    outputSettings: outputSettingsDict)
+        readerOutput.alwaysCopiesSampleData = false
+        reader.add(readerOutput)
+
+        var channelCount = 1
+        let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
+        for item in formatDescriptions {
+            guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
+                fatalError("Couldn't get the format description")
+            }
+            channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
+        }
+
+        let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
+        let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
+
+        var outputSamples = [Float]()
+        var sampleBuffer = Data()
+
+        // 16-bit samples
+        reader.startReading()
+        defer { reader.cancelReading() }
+
+        while reader.status == .reading {
+            guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
+                let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
+                    break
+            }
+            // Append audio sample buffer into our current sample buffer
+            var readBufferLength = 0
+            var readBufferPointer: UnsafeMutablePointer<Int8>?
+            CMBlockBufferGetDataPointer(readBuffer,
+                                        atOffset: 0,
+                                        lengthAtOffsetOut: &readBufferLength,
+                                        totalLengthOut: nil,
+                                        dataPointerOut: &readBufferPointer)
+            sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
+            CMSampleBufferInvalidate(readSampleBuffer)
+
+            let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
+            let downSampledLength = totalSamples / samplesPerPixel
+            let samplesToProcess = downSampledLength * samplesPerPixel
+
+            guard samplesToProcess > 0 else { continue }
+
+            processSamples(fromData: &sampleBuffer,
+                           outputSamples: &outputSamples,
+                           samplesToProcess: samplesToProcess,
+                           downSampledLength: downSampledLength,
+                           samplesPerPixel: samplesPerPixel,
+                           filter: filter)
+            //print("Status: \(reader.status)")
+        }
+
+        // Process the remaining samples at the end which didn't fit into samplesPerPixel
+        let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
+        if samplesToProcess > 0 {
+            let downSampledLength = 1
+            let samplesPerPixel = samplesToProcess
+            let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
+
+            processSamples(fromData: &sampleBuffer,
+                           outputSamples: &outputSamples,
+                           samplesToProcess: samplesToProcess,
+                           downSampledLength: downSampledLength,
+                           samplesPerPixel: samplesPerPixel,
+                           filter: filter)
+            //print("Status: \(reader.status)")
+        }
+
+        // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
+        guard reader.status == .completed || true else {
+            fatalError("Couldn't read the audio file")
+        }
+
+        return outputSamples
+    }
+
+    func processSamples(fromData sampleBuffer: inout Data,
+                        outputSamples: inout [Float],
+                        samplesToProcess: Int,
+                        downSampledLength: Int,
+                        samplesPerPixel: Int,
+                        filter: [Float]) {
+        sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in
+            var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
+
+            let sampleCount = vDSP_Length(samplesToProcess)
+
+            //Convert 16bit int samples to floats
+            vDSP_vflt16(samples, 1, &processingBuffer, 1, sampleCount)
+
+            //Take the absolute values to get amplitude
+            vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
+
+            //get the corresponding dB, and clip the results
+            getdB(from: &processingBuffer)
+
+            //Downsample and average
+            var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
+            vDSP_desamp(processingBuffer,
+                        vDSP_Stride(samplesPerPixel),
+                        filter, &downSampledData,
+                        vDSP_Length(downSampledLength),
+                        vDSP_Length(samplesPerPixel))
+
+            //Remove processed samples
+            sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
+
+            outputSamples += downSampledData
+        }
+    }
+
+    func getdB(from normalizedSamples: inout [Float]) {
+        // Convert samples to a log scale
+        var zero: Float = 32768.0
+        vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
+
+        //Clip to [noiseFloor, 0]
+        var ceil: Float = 0.0
+        var noiseFloorMutable: Float = -80.0 // TODO: CHANGE THIS VALUE
+        vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
+    }
+
 	// PRAGMA: - Play Mode Handling
 
 	public func play(for duration: TimeInterval) {
@@ -207,22 +356,22 @@ public class AudioVisualizationView: BaseNibView {
             fatalError("trying to read audio visualization in write mode")
         }
 
-        let track: AVAudioFile
-        do {
-            track = try AVAudioFile(forReading: url)
-            self.meteringLevels = try track.buffer().first
-        } catch {
-            fatalError("failed to create file from url")
-        }
+        var outputArray : [Float] = []
+        AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
+            guard let audioContext = audioContext else {
+                fatalError("Couldn't create the audioContext")
+            }
+            outputArray = self.render(audioContext: audioContext, targetSamples: 300)
+        })
+
+        self.meteringLevels = outputArray
 
+        print(self.meteringLevels)
         guard self.meteringLevels != nil else {
             fatalError("trying to read audio visualization of non initialized sound record")
         }
 
-        let audioNodeFileLength = AVAudioFrameCount(track.length)
-        let duration = Double(audioNodeFileLength) / 44100.0 // Divide by the AVSampleRateKey in the recorder settings
-
-        self.play(for: duration)
+        self.play(for: 10)
     }
 
 	// MARK: - Mask + Gradient