FastLED 3.9.15
Loading...
Searching...
No Matches
vocal.h
Go to the documentation of this file.
1// Vocal - Human voice detection using spectral characteristics
2// Part of FastLED Audio System v2.0 - Phase 3 (Differentiators)
3//
4// Detects human voice in audio using spectral centroid, spectral rolloff,
5// and formant ratio analysis. Provides confidence-based detection with
6// hysteresis for stable vocal/non-vocal classification.
7
8#pragma once
9
12#include "fl/math/math.h"
13#include "fl/stl/function.h"
14#include "fl/stl/shared_ptr.h"
15#include "fl/stl/noexcept.h"
16
17namespace fl {
18namespace audio {
19namespace detector {
20
21class Vocal : public Detector {
22public:
24 ~Vocal() FL_NOEXCEPT override;
25
26 // Detector interface
27 void update(shared_ptr<Context> context) override;
28 void fireCallbacks() override;
29 bool needsFFT() const override { return true; }
30 const char* getName() const override { return "Vocal"; }
31 void reset() override;
32 void setSampleRate(int sampleRate) override { mSampleRate = sampleRate; }
33
34 // Callbacks (multiple listeners supported)
35 function_list<void(u8 active)> onVocal;
36 function_list<void()> onVocalStart;
37 function_list<void()> onVocalEnd;
38
39 // State access
40 bool isVocal() const { return mVocalActive; }
41 float getConfidence() const { return mConfidenceSmoother.value(); }
42 void setThreshold(float threshold) { mOnThreshold = threshold; mOffThreshold = fl::max(0.0f, threshold - 0.13f); }
43 void setSmoothingAlpha(float tau) { mConfidenceSmoother.setTau(tau); }
44
46
47private:
50 bool mStateChanged = false;
52 float mOnThreshold = 0.65f;
53 float mOffThreshold = 0.52f;
54 // Time-aware confidence smoothing (tau=0.05s ≈ old alpha=0.7 at 43fps)
56 int mFramesInState = 0; // Debounce counter
57 static constexpr int MIN_FRAMES_TO_TRANSITION = 3; // Debounce: require N frames before state change
61 float mSpectralFlatness = 0.0f;
62 float mHarmonicDensity = 0.0f;
63 float mVocalPresenceRatio = 0.0f;
64 float mSpectralFlux = 0.0f;
65 float mSpectralVariance = 0.0f;
66 float mEnvelopeJitter = 0.0f;
68 float mZeroCrossingCV = 0.0f;
73 fl::vector<float> mFluxNormBins; // Reusable buffer for spectral flux (avoids per-frame allocation)
75 int mSampleRate = 44100;
77 int mBroadNumBins = 16;
78 // Cached formant bin indices for the narrow formant FFT
79 int mFormantCachedBinCount = -1; // Invalidation sentinel
82
85
86 // Formant ratio from high-res narrow FFT (64 bins, 200-3500 Hz)
87 void computeFormantRatio(const fft::Bins& formantFft);
88 // Broad spectral features from low-res wide FFT (16 bins, 174.6-4698.3 Hz)
89 void computeBroadSpectralFeatures(const fft::Bins& broadFft);
90 // Vocal presence ratio from broad FFT linear bins
91 float calculateVocalPresenceRatio(const fft::Bins& broadFft);
92 // Fused PCM pass: computes envelope jitter + shimmer AND zero-crossing CV
93 // in a single traversal. Saves one full PCM pass (~2-3 us).
96 float calculateRawConfidence(float formantRatio,
97 float spectralFlatness, float harmonicDensity,
98 float vocalPresenceRatio, float spectralFlux,
99 float spectralVariance);
100};
101
102// Test-only accessor for internal diagnostic state
104 static int getNumBins(const Vocal& d) { return d.mFormantNumBins; }
105 static int getBroadNumBins(const Vocal& d) { return d.mBroadNumBins; }
106 static float getSpectralFlatness(const Vocal& d) { return d.mSpectralFlatness; }
107 static float getHarmonicDensity(const Vocal& d) { return d.mHarmonicDensity; }
108 static float getSpectralCentroid(const Vocal& d) { return d.mSpectralCentroid; }
109 static float getSpectralRolloff(const Vocal& d) { return d.mSpectralRolloff; }
110 static float getFormantRatio(const Vocal& d) { return d.mFormantRatio; }
111 static float getVocalPresenceRatio(const Vocal& d) { return d.mVocalPresenceRatio; }
112 static float getSpectralFlux(const Vocal& d) { return d.mSpectralFlux; }
113 static float getSpectralVariance(const Vocal& d) { return d.mSpectralVariance; }
114 static float getEnvelopeJitter(const Vocal& d) { return d.mEnvelopeJitter; }
116 static float getZeroCrossingCV(const Vocal& d) { return d.mZeroCrossingCV; }
117 static float getRawConfidence(const Vocal& d) { return d.mConfidence; }
118};
119
120} // namespace detector
121} // namespace audio
122} // namespace fl
ExponentialSmoother< float > mZcCVSmoother
Definition vocal.h:71
function_list< void(u8 active)> onVocal
Definition vocal.h:35
void setSmoothingAlpha(float tau)
Definition vocal.h:43
float calculateVocalPresenceRatio(const fft::Bins &broadFft)
ExponentialSmoother< float > mEnvelopeJitterSmoother
Definition vocal.h:69
static constexpr int MIN_FRAMES_TO_TRANSITION
Definition vocal.h:57
fl::vector< float > mFluxNormBins
Definition vocal.h:73
shared_ptr< const fft::Bins > mRetainedFormantFFT
Definition vocal.h:83
friend struct VocalDetectorDiagnostics
Definition vocal.h:45
ExponentialSmoother< float > mConfidenceSmoother
Definition vocal.h:55
void computePCMTimeDomainFeatures(span< const i16 > pcm)
SpectralVariance< float > mSpectralVarianceFilter
Definition vocal.h:74
fl::vector< float > mPrevBins
Definition vocal.h:72
void setSampleRate(int sampleRate) override
Definition vocal.h:32
bool needsFFT() const override
Definition vocal.h:29
ExponentialSmoother< float > mAcfIrregularitySmoother
Definition vocal.h:70
float getConfidence() const
Definition vocal.h:41
float calculateAutocorrelationIrregularity(span< const i16 > pcm)
float mAutocorrelationIrregularity
Definition vocal.h:67
void setThreshold(float threshold)
Definition vocal.h:42
void computeFormantRatio(const fft::Bins &formantFft)
const char * getName() const override
Definition vocal.h:30
function_list< void()> onVocalStart
Definition vocal.h:36
bool isVocal() const
Definition vocal.h:40
function_list< void()> onVocalEnd
Definition vocal.h:37
void update(shared_ptr< Context > context) override
Definition vocal.cpp.hpp:26
shared_ptr< const fft::Bins > mRetainedBroadFFT
Definition vocal.h:84
void computeBroadSpectralFeatures(const fft::Bins &broadFft)
void fireCallbacks() override
Definition vocal.cpp.hpp:88
float calculateRawConfidence(float formantRatio, float spectralFlatness, float harmonicDensity, float vocalPresenceRatio, float spectralFlux, float spectralVariance)
~Vocal() FL_NOEXCEPT override
unsigned char u8
Definition stdint.h:131
constexpr common_type_t< T, U > max(T a, U b) FL_NOEXCEPT
Definition math.h:75
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_NOEXCEPT
static float getSpectralVariance(const Vocal &d)
Definition vocal.h:113
static float getSpectralFlatness(const Vocal &d)
Definition vocal.h:106
static float getHarmonicDensity(const Vocal &d)
Definition vocal.h:107
static float getAutocorrelationIrregularity(const Vocal &d)
Definition vocal.h:115
static float getSpectralCentroid(const Vocal &d)
Definition vocal.h:108
static float getSpectralFlux(const Vocal &d)
Definition vocal.h:112
static float getRawConfidence(const Vocal &d)
Definition vocal.h:117
static float getFormantRatio(const Vocal &d)
Definition vocal.h:110
static int getNumBins(const Vocal &d)
Definition vocal.h:104
static int getBroadNumBins(const Vocal &d)
Definition vocal.h:105
static float getSpectralRolloff(const Vocal &d)
Definition vocal.h:109
static float getVocalPresenceRatio(const Vocal &d)
Definition vocal.h:111
static float getEnvelopeJitter(const Vocal &d)
Definition vocal.h:114
static float getZeroCrossingCV(const Vocal &d)
Definition vocal.h:116