FastLED 3.9.15
Loading...
Searching...
No Matches

◆ calculateRawConfidence()

float fl::audio::detector::Vocal::calculateRawConfidence ( float formantRatio,
float spectralFlatness,
float harmonicDensity,
float vocalPresenceRatio,
float spectralFlux,
float spectralVariance )
private

Definition at line 447 of file vocal.cpp.hpp.

450 {
451 // Centroid and rolloff removed from scoring (combined 0.04 weight — negligible
452 // impact on accuracy). Centroid still computed for diagnostics.
453
454 // Continuous confidence scores for each feature (no hard binary cutoffs)
455
456 // Formant score: voice has F2/F1 ratio; real audio ratio ~0.14, synthetic ~0.7
457 // Ramp zone 0-0.12 catches very low ratios (pure tone, sparse sines).
458 // /i/ vowel (form~0.15) and guitar (form~0.17) overlap in formant ratio,
459 // so guitar rejection relies on time-domain penalties (jitter, zcCV) instead.
460 float formantScore;
461 if (formantRatio < 0.12f) {
462 formantScore = formantRatio / 0.12f * 0.30f; // Ramp: very low → 0..0.30
463 } else {
464 // Peak at 0.5, floor 0.30, width 0.70 (accommodates /i/ vowel)
465 float dist = fl::abs(formantRatio - 0.5f);
466 formantScore = fl::max(0.30f, 1.0f - dist / 0.70f);
467 }
468
469 // Spectral flatness score: wider window to survive mix contamination.
470 // Isolated vocal ~0.44, vocal-in-mix ~0.60, backing ~0.57.
471 // Peak at 0.43, width ±0.22 — mix vocal (0.60) still scores ~0.22.
472 float flatnessScore;
473 if (spectralFlatness < 0.20f) {
474 // Too tonal (pure tones, sparse sines)
475 flatnessScore = spectralFlatness / 0.20f * 0.3f;
476 } else if (spectralFlatness <= 0.65f) {
477 // Voice range — peak at 0.43, wide window
478 float dist = fl::abs(spectralFlatness - 0.43f);
479 flatnessScore = fl::max(0.0f, 1.0f - dist / 0.22f);
480 } else {
481 // Too flat — noise-like (steep decay)
482 flatnessScore = fl::max(0.0f, 1.0f - (spectralFlatness - 0.65f) / 0.10f);
483 }
484
485 // Harmonic density score: with 16 broad bins (scaled from 64-bin calibration).
486 // Real audio: ~6-7, synthetic voice: 8-12, pure tone: ~4, noise: ~15
487 float densityScore;
488 if (harmonicDensity < 2.5f) {
489 // Too sparse — pure tone or near-tonal
490 densityScore = harmonicDensity / 2.5f * 0.3f;
491 } else if (harmonicDensity <= 12.5f) {
492 // Voice-like range
493 densityScore = 0.3f + (harmonicDensity - 2.5f) / 10.0f * 0.7f;
494 } else {
495 // Too dense — noise-like
496 densityScore = fl::max(0.0f, 1.0f - (harmonicDensity - 12.5f) / 3.5f);
497 }
498
499 // Spectral flux score: voice has higher frame-to-frame spectral change
500 // than sustained instruments (phoneme transitions, formant shifts).
501 // Thresholds scaled by sqrt(16/64) = 0.5 for 16-bin broad FFT.
502 float spectralFluxScore;
503 if (spectralFlux < 0.01f) {
504 spectralFluxScore = 0.0f;
505 } else if (spectralFlux < 0.15f) {
506 spectralFluxScore = (spectralFlux - 0.01f) / 0.14f;
507 } else {
508 spectralFluxScore = 1.0f;
509 }
510
511 // Weighted average — centroid/rolloff removed (combined 0.04 weight,
512 // 0-5% separation between voice and guitar — not discriminative).
513 // Budget redistributed to formant/flatness (best discriminators).
514 float weightedAvg = 0.33f * formantScore
515 + 0.35f * flatnessScore
516 + 0.12f * densityScore
517 + 0.10f * spectralFluxScore;
518
519 // Accumulate all multiplicative boosts into totalBoost, then cap.
520 // This prevents the theoretical 3.2x runaway when all boosts fire
521 // simultaneously. Cap at 1.60x allows two boosts to stack but
522 // prevents pathological cases. Synthetic signals trigger zero boosts
523 // (jitter <0.15, acfIrreg <0.30), so existing tests are unaffected.
524 float totalBoost = 1.0f;
525
526 // Temporal spectral variance boost (not base score).
527 // Voice has higher variance (~1.01/~0.84 in 3-way) than guitar (~0.74).
528 // Applied only when variance > 0 (real multi-frame audio).
529 if (spectralVariance > 0.01f) {
530 float varianceBoost = 1.0f + 0.40f * (spectralVariance - 0.74f) / 0.76f;
531 varianceBoost = fl::clamp(varianceBoost, 0.90f, 1.25f);
532 totalBoost *= varianceBoost;
533 }
534
535 // Low-jitter penalty: very periodic signals (guitar jit=0.10) get penalized.
536 // Voice jitter is always >0.50, so this only penalizes instruments.
538 float periodicPenalty = 1.0f - 0.25f * (0.15f - mEnvelopeJitter) / 0.15f;
539 totalBoost *= periodicPenalty;
540 }
541
542 // Envelope jitter boost: only for very high jitter (>0.55).
543 // Drums alone reach 0.61, so only extreme jitter gets a small boost.
544 if (mEnvelopeJitter > 0.55f) {
545 float jitterBoost = 1.0f + 0.8f * (mEnvelopeJitter - 0.55f);
546 jitterBoost = fl::clamp(jitterBoost, 1.0f, 1.15f);
547 totalBoost *= jitterBoost;
548 }
549
550 // Autocorrelation irregularity boost: only above 0.75 (above drum baseline).
551 if (mAutocorrelationIrregularity > 0.75f) {
552 float acfBoost = 1.0f + 1.0f * (mAutocorrelationIrregularity - 0.75f);
553 acfBoost = fl::clamp(acfBoost, 1.0f, 1.25f);
554 totalBoost *= acfBoost;
555 }
556
557 // Low-zcCV penalty: very regular zero-crossings (guitar zcCV=0.05) penalized.
558 // Voice zcCV is always >0.30, so this only hits periodic instruments.
560 float zcPenalty = 1.0f - 0.20f * (0.10f - mZeroCrossingCV) / 0.10f;
561 totalBoost *= zcPenalty;
562 }
563
564 // Zero-crossing CV: peaked boost for moderate values, penalty for high.
565 if (mZeroCrossingCV > 0.02f) {
566 float zcBoost;
567 if (mZeroCrossingCV < 0.80f) {
568 zcBoost = 1.0f + 0.25f * (mZeroCrossingCV - 0.10f) / 0.70f;
569 } else if (mZeroCrossingCV < 1.50f) {
570 zcBoost = 1.0f;
571 } else {
572 zcBoost = 1.0f - 0.15f * (mZeroCrossingCV - 1.50f) / 1.00f;
573 }
574 zcBoost = fl::clamp(zcBoost, 0.80f, 1.20f);
575 totalBoost *= zcBoost;
576 }
577
578 // Combined aperiodicity-jitter boost: when BOTH indicate voice.
579 if (mAutocorrelationIrregularity > 0.75f && mEnvelopeJitter > 0.55f) {
580 float minExcess = fl::min(mAutocorrelationIrregularity - 0.75f,
581 mEnvelopeJitter - 0.55f);
582 float combinedBoost = 1.0f + 1.5f * minExcess;
583 combinedBoost = fl::clamp(combinedBoost, 1.0f, 1.20f);
584 totalBoost *= combinedBoost;
585 }
586
587 // Cap total boost chain to prevent runaway
588 totalBoost = fl::clamp(totalBoost, 0.50f, 1.60f);
589 weightedAvg *= totalBoost;
590
591 // Formant gate/boost: weak formant → gate down, strong formant → mild boost.
592 float formantMultiplier;
593 if (formantScore < 0.40f) {
594 formantMultiplier = formantScore * 2.5f; // gate (0.0 to 1.0)
595 } else {
596 formantMultiplier = 1.0f + 0.40f * (formantScore - 0.40f); // boost (1.0 to 1.24)
597 }
598
599 // Clamp final confidence to [0, 1]
600 mConfidence = fl::clamp(weightedAvg * formantMultiplier, 0.0f, 1.0f);
601 return mConfidence;
602}
float mAutocorrelationIrregularity
Definition vocal.h:67
FL_DISABLE_WARNING_PUSH U constexpr common_type_t< T, U > min(T a, U b) FL_NOEXCEPT
Definition math.h:71
constexpr common_type_t< T, U > max(T a, U b) FL_NOEXCEPT
Definition math.h:75
constexpr enable_if< is_fixed_point< T >::value, T >::type abs(T x) FL_NOEXCEPT
constexpr enable_if< is_fixed_point< T >::value, T >::type clamp(T x, T lo, T hi) FL_NOEXCEPT

References fl::abs(), fl::clamp(), mAutocorrelationIrregularity, fl::max(), mConfidence, mEnvelopeJitter, fl::min(), and mZeroCrossingCV.

Referenced by update().

+ Here is the call graph for this function:
+ Here is the caller graph for this function: