FastLED 3.9.15
Loading...
Searching...
No Matches
vocal.cpp.hpp
Go to the documentation of this file.
1// Vocal - Human voice detection implementation
2// Part of FastLED Audio System v2.0 - Phase 3 (Differentiators)
3
6#include "fl/audio/fft/fft.h"
7#include "fl/math/math.h"
8#include "fl/stl/algorithm.h"
9#include "fl/stl/noexcept.h"
10
11namespace fl {
12namespace audio {
13namespace detector {
14
16 : mVocalActive(false)
18 , mConfidence(0.0f)
19 , mSpectralCentroid(0.0f)
20 , mSpectralRolloff(0.0f)
21 , mFormantRatio(0.0f)
22{}
23
24Vocal::~Vocal() FL_NOEXCEPT = default;
25
26void Vocal::update(shared_ptr<Context> context) {
27 mSampleRate = context->getSampleRate();
28 // Dual FFT: high-resolution formant analysis + broad spectral features.
29 // Formant FFT: 64 CQ bins in 200-3500 Hz — concentrates resolution on
30 // F1/F2/F3 formants (94% bin utilization vs 73% with full-range).
31 // Broad FFT: 16 LOG_REBIN bins in 174.6-4698.3 Hz — full spectral
32 // coverage for flatness, density, flux, and vocal presence ratio.
33 mRetainedFormantFFT = context->getFFT(64, 200.0f, 3500.0f);
34 mRetainedBroadFFT = context->getFFT(16, 174.6f, 4698.3f);
35 const fft::Bins& formantFft = *mRetainedFormantFFT;
36 const fft::Bins& broadFft = *mRetainedBroadFFT;
37 mFormantNumBins = static_cast<int>(formantFft.raw().size());
38 mBroadNumBins = static_cast<int>(broadFft.raw().size());
39
40 // Formant ratio from high-res narrow FFT
41 computeFormantRatio(formantFft);
42 // Broad spectral features (flatness, density, flux) from wide FFT
44
45 // Vocal presence ratio from broad FFT linear bins (needs 200-4000 Hz coverage)
48
49 // Calculate time-domain features from raw PCM
50 span<const i16> pcm = context->getPCM();
51 const float dt = computeAudioDt(pcm.size(), context->getSampleRate());
52 // Fused pass: envelope jitter + zero-crossing CV in one PCM traversal
58
59 // Calculate raw confidence and apply time-aware smoothing
60 float rawConfidence = calculateRawConfidence(
63 float smoothedConfidence = mConfidenceSmoother.update(rawConfidence, dt);
64
65 // Hysteresis: use separate on/off thresholds to prevent chattering
66 bool wantActive;
67 if (mVocalActive) {
68 wantActive = (smoothedConfidence >= mOffThreshold);
69 } else {
70 wantActive = (smoothedConfidence >= mOnThreshold);
71 }
72
73 // Debounce: require state to persist for MIN_FRAMES_TO_TRANSITION frames
74 if (wantActive != mVocalActive) {
77 mVocalActive = wantActive;
79 }
80 } else {
82 }
83
84 // Track state changes for fireCallbacks
86}
87
89 if (mStateChanged) {
90 if (onVocal) onVocal(static_cast<u8>(mConfidenceSmoother.value() * 255.0f));
94 mStateChanged = false;
95 }
96}
97
99 mVocalActive = false;
100 mPreviousVocalActive = false;
101 mConfidence = 0.0f;
102 mConfidenceSmoother.reset();
103 mSpectralCentroid = 0.0f;
104 mSpectralRolloff = 0.0f;
105 mFormantRatio = 0.0f;
106 mSpectralFlatness = 0.0f;
107 mHarmonicDensity = 0.0f;
108 mVocalPresenceRatio = 0.0f;
109 mSpectralFlux = 0.0f;
110 mSpectralVariance = 0.0f;
111 mEnvelopeJitter = 0.0f;
113 mZeroCrossingCV = 0.0f;
116 mZcCVSmoother.reset();
117 mPrevBins.clear();
118 mFluxNormBins.clear();
120 mFramesInState = 0;
122}
123
124void Vocal::computeFormantRatio(const fft::Bins& formantFft) {
125 // Formant ratio from high-resolution narrow FFT (64 bins, 200-3500 Hz).
126 // All bins concentrated in the formant region — ~94% utilization.
127 const auto& bins = formantFft.raw();
128 const int n = static_cast<int>(bins.size());
129 if (n < 8) {
130 mFormantRatio = 0.0f;
131 return;
132 }
133
134 // Ensure formant bin cache is current (freqToBin uses logf internally)
135 if (mFormantCachedBinCount != n) {
136 mFormantF1MinBin = fl::max(0, formantFft.freqToBin(250.0f));
137 mFormantF1MaxBin = fl::min(n - 1, formantFft.freqToBin(900.0f));
138 mFormantF2MinBin = fl::max(0, formantFft.freqToBin(1000.0f));
139 mFormantF2MaxBin = fl::min(n - 1, formantFft.freqToBin(3000.0f));
141 }
142
143 float f1Peak = 0.0f, f1Sum = 0.0f;
144 int f1Count = 0;
145 float f2Peak = 0.0f, f2Sum = 0.0f;
146 int f2Count = 0;
147
148 for (int i = 0; i < n; ++i) {
149 const float mag = bins[i];
150 if (i >= mFormantF1MinBin && i <= mFormantF1MaxBin) {
151 f1Peak = fl::max(f1Peak, mag);
152 f1Sum += mag;
153 ++f1Count;
154 }
155 if (i >= mFormantF2MinBin && i <= mFormantF2MaxBin) {
156 f2Peak = fl::max(f2Peak, mag);
157 f2Sum += mag;
158 ++f2Count;
159 }
160 }
161
162 if (f1Count > 0 && f2Count > 0) {
163 float f1Avg = f1Sum / static_cast<float>(f1Count);
164 float f2Avg = f2Sum / static_cast<float>(f2Count);
165 if (f1Peak >= f1Avg * 1.5f && f2Peak >= f2Avg * 1.5f && f1Peak >= 1e-6f) {
166 mFormantRatio = f2Peak / f1Peak;
167 } else {
168 mFormantRatio = 0.0f;
169 }
170 } else {
171 mFormantRatio = 0.0f;
172 }
173}
174
176 // Broad spectral features from low-res wide FFT (16 bins, 174.6-4698.3 Hz).
177 // Computes flatness, harmonic density, and spectral flux.
178 const auto& bins = broadFft.raw();
179 const int n = static_cast<int>(bins.size());
180 if (n == 0) {
181 mSpectralFlatness = 0.0f;
182 mHarmonicDensity = 0.0f;
183 mSpectralFlux = 0.0f;
184 return;
185 }
186
187 // === PASS 1: Flatness + density peak + magnitudeSum ===
188 float magnitudeSum = 0.0f;
189 float sumLn = 0.0f;
190 float sumRaw = 0.0f;
191 int flatnessCount = 0;
192 float peak = 0.0f;
193
194 for (int i = 0; i < n; ++i) {
195 const float mag = bins[i];
196 magnitudeSum += mag;
197 if (mag > 1e-6f) {
198 sumLn += fast_logf_approx(mag);
199 sumRaw += mag;
200 ++flatnessCount;
201 }
202 if (mag > peak) peak = mag;
203 }
204
205 // --- Flatness result ---
206 if (flatnessCount >= 2) {
207 float geometricMean = fl::expf(sumLn / static_cast<float>(flatnessCount));
208 float arithmeticMean = sumRaw / static_cast<float>(flatnessCount);
209 mSpectralFlatness = (arithmeticMean < 1e-6f) ? 0.0f : geometricMean / arithmeticMean;
210 } else {
211 mSpectralFlatness = 0.0f;
212 }
213
214 // === PASS 2: Density count + spectral flux ===
215 mHarmonicDensity = 0.0f;
216 mSpectralFlux = 0.0f;
217
218 if (magnitudeSum >= 1e-6f) {
219 const float densityThreshold = peak * 0.1f;
220 const float invSum = 1.0f / magnitudeSum;
221 const bool hasPrev = (mPrevBins.size() == static_cast<fl::size>(n));
222 int densityCount = 0;
223 float flux = 0.0f;
224
225 mFluxNormBins.resize(n);
226 for (int i = 0; i < n; ++i) {
227 const float mag = bins[i];
228 if (mag >= densityThreshold) ++densityCount;
229 float norm = mag * invSum;
230 mFluxNormBins[i] = norm;
231 if (hasPrev) {
232 float diff = norm - mPrevBins[i];
233 flux += diff * diff;
234 }
235 }
236
237 mHarmonicDensity = static_cast<float>(densityCount);
238 mSpectralFlux = hasPrev ? fl::sqrtf(flux) : 0.0f;
240 } else {
241 mPrevBins.clear();
242 }
243}
244
245
247 // Vocal presence ratio using LINEAR bins for better high-frequency resolution.
248 // CQ bins compress the presence band (2-4 kHz) into few bins; linear bins
249 // give uniform frequency resolution across the spectrum.
250 //
251 // Ratio = mean energy in 2-4 kHz (F3 formant / sibilance) /
252 // mean energy in 80-400 Hz (guitar fundamentals / bass)
253 // Voice adds energy in the 2-4 kHz range; guitar energy decays above 2 kHz.
254 auto linearBins = fft.linear();
255 if (linearBins.size() < 4) return 0.0f;
256
257 const int numBins = static_cast<int>(linearBins.size());
258 const float fmin = fft.linearFmin();
259 const float fmax = fft.linearFmax();
260 const float binWidth = (fmax - fmin) / static_cast<float>(numBins);
261
262 // Map frequency to linear bin index
263 auto freqToLinBin = [&](float freq) -> int {
264 if (freq <= fmin) return 0;
265 if (freq >= fmax) return numBins - 1;
266 return fl::min(numBins - 1, static_cast<int>((freq - fmin) / binWidth));
267 };
268
269 const int presMinBin = freqToLinBin(2000.0f);
270 const int presMaxBin = freqToLinBin(4000.0f);
271 const int bassMinBin = freqToLinBin(200.0f);
272 const int bassMaxBin = freqToLinBin(500.0f);
273
274 float presEnergy = 0.0f;
275 int presCount = 0;
276 for (int i = presMinBin; i <= presMaxBin && i < numBins; ++i) {
277 presEnergy += linearBins[i] * linearBins[i];
278 ++presCount;
279 }
280 if (presCount > 0) presEnergy /= static_cast<float>(presCount);
281
282 float bassEnergy = 0.0f;
283 int bassCount = 0;
284 for (int i = bassMinBin; i <= bassMaxBin && i < numBins; ++i) {
285 bassEnergy += linearBins[i] * linearBins[i];
286 ++bassCount;
287 }
288 if (bassCount > 0) bassEnergy /= static_cast<float>(bassCount);
289
290 if (bassEnergy < 1e-12f) return 0.0f;
291 return presEnergy / bassEnergy;
292}
293
294
296 // Fused single-pass computation of envelope jitter + shimmer AND
297 // zero-crossing CV. Previously two separate PCM traversals; now one.
298 // Saves ~2-3 us by eliminating redundant PCM reads and cache misses.
299 const int n = static_cast<int>(pcm.size());
300 if (n < 44) {
301 mEnvelopeJitter = 0.0f;
302 mZeroCrossingCV = 0.0f;
303 return;
304 }
305
306 const float normFactor = 1.0f / 32768.0f;
307 const int halfWin = fl::max(2, mSampleRate / 4000); // ~11 samples at 44100
308 const int winSize = 2 * halfWin + 1;
309 const float invWinSize = 1.0f / static_cast<float>(winSize);
310
311 // Seed the sliding window sum
312 float windowSum = 0.0f;
313 for (int j = 0; j < winSize && j < n; ++j) {
314 windowSum += fl::abs(static_cast<float>(pcm[j])) * normFactor;
315 }
316
317 // Envelope jitter accumulators
318 float sumEnv = 0.0f;
319 float sumDev = 0.0f;
320 int count = 0;
321 float sumPeaks = 0.0f;
322 float sumSqPeaks = 0.0f;
323 int numCycles = 0;
324 float currentPeak = 0.0f;
325 bool wasPositive = pcm[halfWin] >= 0;
326
327 // Zero-crossing CV accumulators (fused into same loop)
328 int prevCrossing = -1;
329 int numIntervals = 0;
330 float sumIntervals = 0.0f;
331 float sumSqIntervals = 0.0f;
332
333 for (int i = halfWin; i < n - halfWin; ++i) {
334 float absVal = fl::abs(static_cast<float>(pcm[i])) * normFactor;
335 float smoothed = windowSum * invWinSize;
336
337 sumEnv += smoothed;
338 sumDev += fl::abs(absVal - smoothed);
339 ++count;
340
341 // Shimmer + zero-crossing detection (shared)
342 currentPeak = fl::max(currentPeak, absVal);
343 bool isPositive = pcm[i] >= 0;
344 if (isPositive != wasPositive) {
345 // Shimmer: track peaks between zero crossings
346 if (currentPeak > 0.01f) {
347 sumPeaks += currentPeak;
348 sumSqPeaks += currentPeak * currentPeak;
349 ++numCycles;
350 }
351 currentPeak = 0.0f;
352
353 // ZC CV: track interval statistics
354 if (prevCrossing >= 0) {
355 float interval = static_cast<float>(i - prevCrossing);
356 sumIntervals += interval;
357 sumSqIntervals += interval * interval;
358 ++numIntervals;
359 }
360 prevCrossing = i;
361 }
362 wasPositive = isPositive;
363
364 // Slide window
365 if (i + 1 < n - halfWin) {
366 windowSum -= fl::abs(static_cast<float>(pcm[i - halfWin])) * normFactor;
367 windowSum += fl::abs(static_cast<float>(pcm[i + halfWin + 1])) * normFactor;
368 }
369 }
370
371 // --- Envelope jitter result ---
372 if (sumEnv < 1e-6f || count == 0) {
373 mEnvelopeJitter = 0.0f;
374 } else {
375 float envelopeJitter = (sumDev / static_cast<float>(count))
376 / (sumEnv / static_cast<float>(count));
377
378 float shimmer = 0.0f;
379 if (numCycles >= 3) {
380 float meanPeak = sumPeaks / static_cast<float>(numCycles);
381 if (meanPeak > 0.01f) {
382 float variance = sumSqPeaks / static_cast<float>(numCycles)
383 - meanPeak * meanPeak;
384 if (variance < 0.0f) variance = 0.0f;
385 shimmer = fl::sqrtf(variance) / meanPeak;
386 }
387 }
388 mEnvelopeJitter = envelopeJitter + shimmer * 0.5f;
389 }
390
391 // --- Zero-crossing CV result ---
392 if (numIntervals < 2) {
393 mZeroCrossingCV = 0.0f;
394 } else {
395 float mean = sumIntervals / static_cast<float>(numIntervals);
396 if (mean < 1e-6f) {
397 mZeroCrossingCV = 0.0f;
398 } else {
399 float variance = sumSqIntervals / static_cast<float>(numIntervals)
400 - mean * mean;
401 if (variance < 0.0f) variance = 0.0f;
402 mZeroCrossingCV = fl::sqrtf(variance) / mean;
403 }
404 }
405}
406
408 const int n = static_cast<int>(pcm.size());
409
410 // Vocal fundamental lag range at mSampleRate
411 const int minLag = fl::max(2, mSampleRate / 500); // 500 Hz
412 const int maxLag = fl::min(n / 2, mSampleRate / 172); // 172 Hz
413
414 if (minLag >= maxLag || maxLag >= n) return 0.0f;
415
416 // Subsample by 4: safe since step(4) << minLag(88).
417 // normFactor² cancels in the ratio sum/acf0, so work in raw i16 space.
418 const int step = 4;
419
420 // ACF[0] = energy (subsampled)
421 float acf0 = 0.0f;
422 for (int i = 0; i < n; i += step) {
423 float s = static_cast<float>(pcm[i]);
424 acf0 += s * s;
425 }
426 if (acf0 < 1.0f) return 0.0f;
427
428 // Probe 16 evenly-spaced lags, each with subsampled inner loop.
429 // Total: ~16 × (n/4)/lag ≈ 1700 MACs (vs 34K without subsampling).
430 const int lagRange = maxLag - minLag;
431 const int maxProbes = 16;
432 const int lagStep = fl::max(1, lagRange / maxProbes);
433 float bestPeak = 0.0f;
434 for (int lag = minLag; lag <= maxLag; lag += lagStep) {
435 float sum = 0.0f;
436 for (int i = 0; i + lag < n; i += step) {
437 sum += static_cast<float>(pcm[i]) * static_cast<float>(pcm[i + lag]);
438 }
439 float normalized = sum / acf0;
440 bestPeak = fl::max(bestPeak, normalized);
441 }
442
443 return 1.0f - bestPeak; // 0 = perfectly periodic, 1 = no periodicity
444}
445
446
447float Vocal::calculateRawConfidence(float formantRatio,
448 float spectralFlatness, float harmonicDensity,
449 float vocalPresenceRatio, float spectralFlux,
450 float spectralVariance) {
451 // Centroid and rolloff removed from scoring (combined 0.04 weight — negligible
452 // impact on accuracy). Centroid still computed for diagnostics.
453
454 // Continuous confidence scores for each feature (no hard binary cutoffs)
455
456 // Formant score: voice has F2/F1 ratio; real audio ratio ~0.14, synthetic ~0.7
457 // Ramp zone 0-0.12 catches very low ratios (pure tone, sparse sines).
458 // /i/ vowel (form~0.15) and guitar (form~0.17) overlap in formant ratio,
459 // so guitar rejection relies on time-domain penalties (jitter, zcCV) instead.
460 float formantScore;
461 if (formantRatio < 0.12f) {
462 formantScore = formantRatio / 0.12f * 0.30f; // Ramp: very low → 0..0.30
463 } else {
464 // Peak at 0.5, floor 0.30, width 0.70 (accommodates /i/ vowel)
465 float dist = fl::abs(formantRatio - 0.5f);
466 formantScore = fl::max(0.30f, 1.0f - dist / 0.70f);
467 }
468
469 // Spectral flatness score: wider window to survive mix contamination.
470 // Isolated vocal ~0.44, vocal-in-mix ~0.60, backing ~0.57.
471 // Peak at 0.43, width ±0.22 — mix vocal (0.60) still scores ~0.22.
472 float flatnessScore;
473 if (spectralFlatness < 0.20f) {
474 // Too tonal (pure tones, sparse sines)
475 flatnessScore = spectralFlatness / 0.20f * 0.3f;
476 } else if (spectralFlatness <= 0.65f) {
477 // Voice range — peak at 0.43, wide window
478 float dist = fl::abs(spectralFlatness - 0.43f);
479 flatnessScore = fl::max(0.0f, 1.0f - dist / 0.22f);
480 } else {
481 // Too flat — noise-like (steep decay)
482 flatnessScore = fl::max(0.0f, 1.0f - (spectralFlatness - 0.65f) / 0.10f);
483 }
484
485 // Harmonic density score: with 16 broad bins (scaled from 64-bin calibration).
486 // Real audio: ~6-7, synthetic voice: 8-12, pure tone: ~4, noise: ~15
487 float densityScore;
488 if (harmonicDensity < 2.5f) {
489 // Too sparse — pure tone or near-tonal
490 densityScore = harmonicDensity / 2.5f * 0.3f;
491 } else if (harmonicDensity <= 12.5f) {
492 // Voice-like range
493 densityScore = 0.3f + (harmonicDensity - 2.5f) / 10.0f * 0.7f;
494 } else {
495 // Too dense — noise-like
496 densityScore = fl::max(0.0f, 1.0f - (harmonicDensity - 12.5f) / 3.5f);
497 }
498
499 // Spectral flux score: voice has higher frame-to-frame spectral change
500 // than sustained instruments (phoneme transitions, formant shifts).
501 // Thresholds scaled by sqrt(16/64) = 0.5 for 16-bin broad FFT.
502 float spectralFluxScore;
503 if (spectralFlux < 0.01f) {
504 spectralFluxScore = 0.0f;
505 } else if (spectralFlux < 0.15f) {
506 spectralFluxScore = (spectralFlux - 0.01f) / 0.14f;
507 } else {
508 spectralFluxScore = 1.0f;
509 }
510
511 // Weighted average — centroid/rolloff removed (combined 0.04 weight,
512 // 0-5% separation between voice and guitar — not discriminative).
513 // Budget redistributed to formant/flatness (best discriminators).
514 float weightedAvg = 0.33f * formantScore
515 + 0.35f * flatnessScore
516 + 0.12f * densityScore
517 + 0.10f * spectralFluxScore;
518
519 // Accumulate all multiplicative boosts into totalBoost, then cap.
520 // This prevents the theoretical 3.2x runaway when all boosts fire
521 // simultaneously. Cap at 1.60x allows two boosts to stack but
522 // prevents pathological cases. Synthetic signals trigger zero boosts
523 // (jitter <0.15, acfIrreg <0.30), so existing tests are unaffected.
524 float totalBoost = 1.0f;
525
526 // Temporal spectral variance boost (not base score).
527 // Voice has higher variance (~1.01/~0.84 in 3-way) than guitar (~0.74).
528 // Applied only when variance > 0 (real multi-frame audio).
529 if (spectralVariance > 0.01f) {
530 float varianceBoost = 1.0f + 0.40f * (spectralVariance - 0.74f) / 0.76f;
531 varianceBoost = fl::clamp(varianceBoost, 0.90f, 1.25f);
532 totalBoost *= varianceBoost;
533 }
534
535 // Low-jitter penalty: very periodic signals (guitar jit=0.10) get penalized.
536 // Voice jitter is always >0.50, so this only penalizes instruments.
538 float periodicPenalty = 1.0f - 0.25f * (0.15f - mEnvelopeJitter) / 0.15f;
539 totalBoost *= periodicPenalty;
540 }
541
542 // Envelope jitter boost: only for very high jitter (>0.55).
543 // Drums alone reach 0.61, so only extreme jitter gets a small boost.
544 if (mEnvelopeJitter > 0.55f) {
545 float jitterBoost = 1.0f + 0.8f * (mEnvelopeJitter - 0.55f);
546 jitterBoost = fl::clamp(jitterBoost, 1.0f, 1.15f);
547 totalBoost *= jitterBoost;
548 }
549
550 // Autocorrelation irregularity boost: only above 0.75 (above drum baseline).
551 if (mAutocorrelationIrregularity > 0.75f) {
552 float acfBoost = 1.0f + 1.0f * (mAutocorrelationIrregularity - 0.75f);
553 acfBoost = fl::clamp(acfBoost, 1.0f, 1.25f);
554 totalBoost *= acfBoost;
555 }
556
557 // Low-zcCV penalty: very regular zero-crossings (guitar zcCV=0.05) penalized.
558 // Voice zcCV is always >0.30, so this only hits periodic instruments.
560 float zcPenalty = 1.0f - 0.20f * (0.10f - mZeroCrossingCV) / 0.10f;
561 totalBoost *= zcPenalty;
562 }
563
564 // Zero-crossing CV: peaked boost for moderate values, penalty for high.
565 if (mZeroCrossingCV > 0.02f) {
566 float zcBoost;
567 if (mZeroCrossingCV < 0.80f) {
568 zcBoost = 1.0f + 0.25f * (mZeroCrossingCV - 0.10f) / 0.70f;
569 } else if (mZeroCrossingCV < 1.50f) {
570 zcBoost = 1.0f;
571 } else {
572 zcBoost = 1.0f - 0.15f * (mZeroCrossingCV - 1.50f) / 1.00f;
573 }
574 zcBoost = fl::clamp(zcBoost, 0.80f, 1.20f);
575 totalBoost *= zcBoost;
576 }
577
578 // Combined aperiodicity-jitter boost: when BOTH indicate voice.
579 if (mAutocorrelationIrregularity > 0.75f && mEnvelopeJitter > 0.55f) {
580 float minExcess = fl::min(mAutocorrelationIrregularity - 0.75f,
581 mEnvelopeJitter - 0.55f);
582 float combinedBoost = 1.0f + 1.5f * minExcess;
583 combinedBoost = fl::clamp(combinedBoost, 1.0f, 1.20f);
584 totalBoost *= combinedBoost;
585 }
586
587 // Cap total boost chain to prevent runaway
588 totalBoost = fl::clamp(totalBoost, 0.50f, 1.60f);
589 weightedAvg *= totalBoost;
590
591 // Formant gate/boost: weak formant → gate down, strong formant → mild boost.
592 float formantMultiplier;
593 if (formantScore < 0.40f) {
594 formantMultiplier = formantScore * 2.5f; // gate (0.0 to 1.0)
595 } else {
596 formantMultiplier = 1.0f + 0.40f * (formantScore - 0.40f); // boost (1.0 to 1.24)
597 }
598
599 // Clamp final confidence to [0, 1]
600 mConfidence = fl::clamp(weightedAvg * formantMultiplier, 0.0f, 1.0f);
601 return mConfidence;
602}
603
604} // namespace detector
605} // namespace audio
606} // namespace fl
ExponentialSmoother< float > mZcCVSmoother
Definition vocal.h:71
function_list< void(u8 active)> onVocal
Definition vocal.h:35
float calculateVocalPresenceRatio(const fft::Bins &broadFft)
ExponentialSmoother< float > mEnvelopeJitterSmoother
Definition vocal.h:69
static constexpr int MIN_FRAMES_TO_TRANSITION
Definition vocal.h:57
fl::vector< float > mFluxNormBins
Definition vocal.h:73
shared_ptr< const fft::Bins > mRetainedFormantFFT
Definition vocal.h:83
ExponentialSmoother< float > mConfidenceSmoother
Definition vocal.h:55
void computePCMTimeDomainFeatures(span< const i16 > pcm)
SpectralVariance< float > mSpectralVarianceFilter
Definition vocal.h:74
fl::vector< float > mPrevBins
Definition vocal.h:72
ExponentialSmoother< float > mAcfIrregularitySmoother
Definition vocal.h:70
float calculateAutocorrelationIrregularity(span< const i16 > pcm)
float mAutocorrelationIrregularity
Definition vocal.h:67
void computeFormantRatio(const fft::Bins &formantFft)
function_list< void()> onVocalStart
Definition vocal.h:36
function_list< void()> onVocalEnd
Definition vocal.h:37
void update(shared_ptr< Context > context) override
Definition vocal.cpp.hpp:26
shared_ptr< const fft::Bins > mRetainedBroadFFT
Definition vocal.h:84
void computeBroadSpectralFeatures(const fft::Bins &broadFft)
void fireCallbacks() override
Definition vocal.cpp.hpp:88
float calculateRawConfidence(float formantRatio, float spectralFlatness, float harmonicDensity, float vocalPresenceRatio, float spectralFlux, float spectralVariance)
~Vocal() FL_NOEXCEPT override
fl::span< const float > raw() const FL_NOEXCEPT
Definition fft.cpp.hpp:63
int freqToBin(float freq) const FL_NOEXCEPT
Definition fft.cpp.hpp:111
constexpr fl::size size() const FL_NOEXCEPT
Definition span.h:458
float computeAudioDt(fl::size pcmSize, int sampleRate) FL_NOEXCEPT
Compute the time delta (in seconds) for an audio buffer.
void swap(T &a, T &b) FL_NOEXCEPT
Definition s16x16x4.h:877
FL_DISABLE_WARNING_PUSH U constexpr common_type_t< T, U > min(T a, U b) FL_NOEXCEPT
Definition math.h:71
unsigned char u8
Definition stdint.h:131
float sqrtf(float value) FL_NOEXCEPT
Definition math.h:453
constexpr common_type_t< T, U > max(T a, U b) FL_NOEXCEPT
Definition math.h:75
float expf(float value) FL_NOEXCEPT
Definition math.h:398
float fast_logf_approx(float x) FL_NOEXCEPT
Definition math.h:406
constexpr enable_if< is_fixed_point< T >::value, T >::type step(T edge, T x) FL_NOEXCEPT
constexpr enable_if< is_fixed_point< T >::value, T >::type abs(T x) FL_NOEXCEPT
constexpr enable_if< is_fixed_point< T >::value, T >::type clamp(T x, T lo, T hi) FL_NOEXCEPT
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_NOEXCEPT