FastLED 3.9.15
Loading...
Searching...
No Matches
percussion.cpp.hpp
Go to the documentation of this file.
1// Percussion - Multi-feature spectral classification
2// Part of FastLED Audio System v2.0 - Phase 3 (Differentiators)
3
6#include "fl/math/math.h"
7#include "fl/stl/noexcept.h"
8
9namespace fl {
10namespace audio {
11namespace detector {
12
14 : mKickDetected(false)
15 , mSnareDetected(false)
16 , mHiHatDetected(false)
17 , mTomDetected(false)
18 , mKickConfidence(0.0f)
19 , mSnareConfidence(0.0f)
20 , mHiHatConfidence(0.0f)
21 , mTomConfidence(0.0f)
22 , mBassToTotal(0.0f)
23 , mTrebleToTotal(0.0f)
24 , mClickRatio(0.0f)
25 , mTrebleFlatness(0.0f)
26 , mMidToTreble(0.0f)
27 , mOnsetSharpness(0.0f)
28 , mSubBassProxy(0.0f)
30 , mKickThreshold(0.35f)
31 , mSnareThreshold(0.30f)
32 , mHiHatThreshold(0.30f)
33 , mTomThreshold(0.30f)
34 , mLastKickTime(0)
37 , mLastTomTime(0)
38{}
39
41
43 mRetainedFFT = context->getFFT16(fft::Mode::CQ_NAIVE);
44 const fft::Bins& fft = *mRetainedFFT;
45 u32 timestamp = context->getTimestamp();
46
47 // Step 0: Get zero-crossing factor from raw audio sample
48 mZeroCrossingFactor = context->getZCF();
49
50 // Step 1: Compute spectral features from 16-bin fft::FFT
52
53 // Step 2: Compute total energy and onset sharpness via envelope
54 float totalEnergy = 0.0f;
55 for (fl::size i = 0; i < fft.raw().size(); ++i) {
56 totalEnergy += fft.raw()[i];
57 }
58 const float dt = computeAudioDt(context->getPCM().size(), context->getSampleRate());
59 float envValue = mTotalEnvelope.update(totalEnergy, dt);
60 float flux = fl::max(0.0f, totalEnergy - envValue);
61 mOnsetSharpness = (totalEnergy > 1e-6f) ? flux / totalEnergy : 0.0f;
62
63 // Step 3: Gate on onset — no onset means no percussion
64 static constexpr float kOnsetGate = 0.05f;
65 if (mOnsetSharpness < kOnsetGate) {
66 mKickDetected = false;
67 mSnareDetected = false;
68 mHiHatDetected = false;
69 mTomDetected = false;
70 mKickConfidence = 0.0f;
71 mSnareConfidence = 0.0f;
72 mHiHatConfidence = 0.0f;
73 mTomConfidence = 0.0f;
74 return;
75 }
76
77 // Step 4: Compute per-type confidence scores
79
80 // Step 5: Apply cross-band rejection
82
83 // Step 6: Threshold + cooldown → boolean state
85 (timestamp - mLastKickTime >= KICK_COOLDOWN_MS);
87 (timestamp - mLastSnareTime >= SNARE_COOLDOWN_MS);
89 (timestamp - mLastHiHatTime >= HIHAT_COOLDOWN_MS);
91 (timestamp - mLastTomTime >= TOM_COOLDOWN_MS);
92
93 // Update timestamps
94 if (mKickDetected) mLastKickTime = timestamp;
95 if (mSnareDetected) mLastSnareTime = timestamp;
96 if (mHiHatDetected) mLastHiHatTime = timestamp;
97 if (mTomDetected) mLastTomTime = timestamp;
98}
99
121
123 mKickDetected = false;
124 mSnareDetected = false;
125 mHiHatDetected = false;
126 mTomDetected = false;
127 mKickConfidence = 0.0f;
128 mSnareConfidence = 0.0f;
129 mHiHatConfidence = 0.0f;
130 mTomConfidence = 0.0f;
131 mBassToTotal = 0.0f;
132 mTrebleToTotal = 0.0f;
133 mClickRatio = 0.0f;
134 mTrebleFlatness = 0.0f;
135 mMidToTreble = 0.0f;
136 mOnsetSharpness = 0.0f;
137 mSubBassProxy = 0.0f;
138 mZeroCrossingFactor = 0.0f;
139 mTotalEnvelope.reset();
140 mLastKickTime = 0;
141 mLastSnareTime = 0;
142 mLastHiHatTime = 0;
143 mLastTomTime = 0;
144}
145
147 auto bins = fft.raw();
148 int n = static_cast<int>(bins.size());
149 if (n < 16) {
150 mBassToTotal = 0.0f;
151 mTrebleToTotal = 0.0f;
152 mClickRatio = 0.0f;
153 mTrebleFlatness = 0.0f;
154 mMidToTreble = 0.0f;
155 mSubBassProxy = 0.0f;
156 return;
157 }
158
159 // Frequency-based band boundaries (range-agnostic).
160 // Bass: fmin–300 Hz (kick body, sub-bass)
161 // Mid: 300–2000 Hz (snare body, tom)
162 // Treble: 2000+ Hz (click transients, hi-hat, cymbals)
163 // Click: 1500–4500 Hz (kick beater attack, snare crack)
164 int bassCutBin = fl::max(1, fft.freqToBin(300.0f));
165 int trebleCutBin = fl::max(bassCutBin + 1, fft.freqToBin(2000.0f));
166 int clickLoBin = fl::max(bassCutBin, fft.freqToBin(1500.0f));
167 int clickHiBin = fl::min(n - 1, fft.freqToBin(4500.0f));
168
169 float bassSum = 0.0f;
170 for (int i = 0; i <= bassCutBin; ++i) bassSum += bins[i];
171
172 float midSum = 0.0f;
173 for (int i = bassCutBin + 1; i < trebleCutBin; ++i) midSum += bins[i];
174
175 float trebleSum = 0.0f;
176 for (int i = trebleCutBin; i < n; ++i) trebleSum += bins[i];
177
178 // Click band: 1500–4500 Hz (kick click, snare crack)
179 float clickBandSum = 0.0f;
180 for (int i = clickLoBin; i <= clickHiBin; ++i) clickBandSum += bins[i];
181
182 // Upper-mid for click ratio denominator
183 float upperMidSum = midSum;
184
185 float total = bassSum + midSum + trebleSum;
186
187 // Bass/Total ratio
188 mBassToTotal = (total > 1e-6f) ? bassSum / total : 0.0f;
189
190 // Treble/Total ratio
191 mTrebleToTotal = (total > 1e-6f) ? trebleSum / total : 0.0f;
192
193 // Click ratio: treble click band vs upper-mid notch region
194 mClickRatio = (upperMidSum > 1e-6f) ? clickBandSum / upperMidSum : 0.0f;
195
196 // Mid/Treble ratio
197 mMidToTreble = (trebleSum > 1e-6f) ? midSum / trebleSum : 0.0f;
198
199 // Sub-bass proxy: lowest bin / first mid bin
200 int firstMidBin = bassCutBin + 1;
201 mSubBassProxy = (firstMidBin < n && bins[firstMidBin] > 1e-6f)
202 ? bins[0] / bins[firstMidBin]
203 : 0.0f;
204
205 // Treble flatness: geometric_mean / arithmetic_mean of treble bins
206 int trebleBinCount = n - trebleCutBin;
207 float trebleArithMean = (trebleBinCount > 0) ? trebleSum / static_cast<float>(trebleBinCount) : 0.0f;
208 if (trebleArithMean > 1e-6f) {
209 float sumLog = 0.0f;
210 int validCount = 0;
211 for (int i = trebleCutBin; i < n; ++i) {
212 if (bins[i] > 1e-6f) {
213 sumLog += fl::logf(bins[i]);
214 ++validCount;
215 }
216 }
217 if (validCount > 0) {
218 float geoMean = fl::expf(sumLog / static_cast<float>(validCount));
219 mTrebleFlatness = geoMean / trebleArithMean;
220 } else {
221 mTrebleFlatness = 0.0f;
222 }
223 } else {
224 mTrebleFlatness = 0.0f;
225 }
226}
227
229 // Feature distributions with frequency-based bands (90-14080 Hz, 16 CQ bins):
230 //
231 // Type | bassToTotal | trebleToTotal | clickRatio | trebleFlatness | midToTreble | subBassProxy
232 // ---------|-------------|---------------|------------|----------------|-------------|-------------
233 // Kick | 0.30-0.45 | 0.30-0.50 | 1.0-2.0 | 0.9-1.0 | 0.5-0.9 | 0.5-3
234 // Snare | 0.10-0.25 | 0.55-0.75 | 1.5-2.5 | 0.9-1.0 | 0.2-0.5 | 0.0-0.2
235 // HiHat | 0.03-0.10 | 0.70-0.90 | 2.5-5.0 | 0.9-1.0 | 0.1-0.2 | 0.2-1.5
236 // Tom | 0.25-0.45 | 0.30-0.50 | 0.8-1.5 | 0.9-1.0 | 0.5-1.0 | 0.0-0.3
237
238 // Kick: bass-dominant with high sub-bass proxy (distinguishes from tom)
239 // Key discriminants from tom: subBassProxy (kick > 0.5, tom < 0.3)
240 {
241 // Bass score: peaks at 0.35 (kick range 0.25-0.50)
242 float bassScore = fl::max(0.0f, 1.0f - fl::abs(mBassToTotal - 0.35f) / 0.20f);
243 // Sub-bass proxy: kick has notable sub-bass energy (> 0.5)
244 float subBassScore = fl::min(1.0f, mSubBassProxy / 2.0f);
245 // Moderate treble (0.30-0.50 — higher than tom due to click)
246 float trebleScore = fl::max(0.0f, 1.0f - fl::abs(mTrebleToTotal - 0.40f) / 0.20f);
247 // ZCF: kick has low-moderate zero crossings (~0.1-0.4)
248 float zcfScore = fl::max(0.0f, 1.0f - fl::abs(mZeroCrossingFactor - 0.25f) / 0.25f);
249
250 mKickConfidence = 0.25f * bassScore + 0.35f * subBassScore +
251 0.20f * trebleScore + 0.20f * zcfScore;
252 }
253
254 // Snare: low-moderate bass + high treble from noise rattles
255 {
256 // Bass score: low bass (0.10-0.25) — lower than kick/tom
257 float bassScore = fl::max(0.0f, 1.0f - fl::abs(mBassToTotal - 0.15f) / 0.15f);
258 // Treble presence: snare has high treble (0.55-0.75)
259 float trebleScore = fl::max(0.0f, 1.0f - fl::abs(mTrebleToTotal - 0.65f) / 0.20f);
260 // Mid/Treble ratio: snare ~0.2-0.5 (moderate)
261 float midTrebleScore = fl::max(0.0f, 1.0f - fl::abs(mMidToTreble - 0.30f) / 0.25f);
262 // Very low sub-bass (< 0.2 — no body resonance)
263 float noSubBassScore = fl::max(0.0f, 1.0f - mSubBassProxy / 0.5f);
264 // ZCF: moderate (noise has many zero crossings)
265 float zcfScore = fl::max(0.0f, (mZeroCrossingFactor - 0.20f) / 0.40f);
266 zcfScore = fl::min(1.0f, zcfScore);
267
268 mSnareConfidence = 0.20f * bassScore + 0.25f * trebleScore +
269 0.15f * midTrebleScore + 0.20f * noSubBassScore +
270 0.20f * zcfScore;
271 }
272
273 // HiHat: treble-dominant with high ZCF
274 {
275 // Treble dominance: hi-hat is mostly treble (0.70-0.90)
276 float trebleScore = fl::max(0.0f, (mTrebleToTotal - 0.60f) / 0.30f);
277 trebleScore = fl::min(1.0f, trebleScore);
278 // No bass
279 float noBassScore = fl::max(0.0f, 1.0f - mBassToTotal / 0.15f);
280 // Very high click ratio (> 2.5 — all energy in click band)
281 float highClickScore = fl::max(0.0f, (mClickRatio - 2.0f) / 3.0f);
282 highClickScore = fl::min(1.0f, highClickScore);
283 // ZCF: hi-hat has very high zero crossings (~0.5-0.8)
284 float zcfScore = fl::max(0.0f, (mZeroCrossingFactor - 0.35f) / 0.45f);
285 zcfScore = fl::min(1.0f, zcfScore);
286
287 mHiHatConfidence = 0.30f * trebleScore + 0.20f * noBassScore +
288 0.25f * highClickScore + 0.25f * zcfScore;
289 }
290
291 // Tom: bass with NO sub-bass, low mid-to-treble discriminates from kick
292 {
293 // Moderate bass (0.25-0.45) — similar range to kick
294 float bassScore = fl::max(0.0f, 1.0f - fl::abs(mBassToTotal - 0.35f) / 0.20f);
295 // NO sub-bass: key discriminant from kick (tom < 0.3, kick > 0.5)
296 float noSubBassScore = fl::max(0.0f, 1.0f - mSubBassProxy / 0.5f);
297 // High mid-to-treble ratio (tom body resonance in mid)
298 float midTrebleScore = fl::max(0.0f, (mMidToTreble - 0.50f) / 0.50f);
299 midTrebleScore = fl::min(1.0f, midTrebleScore);
300 // ZCF: tom has very low zero crossings (~0.05-0.15)
301 float zcfScore = fl::max(0.0f, 1.0f - mZeroCrossingFactor / 0.25f);
302
303 mTomConfidence = 0.25f * bassScore + 0.30f * noSubBassScore +
304 0.20f * midTrebleScore + 0.25f * zcfScore;
305 }
306
307 // Clamp all to [0, 1]
312}
313
315 // Winner-takes-all among competing types.
316 // With frequency-based bands (90-14080 Hz), kick and tom have similar
317 // bass/treble ratios. Use sub-bass proxy as primary discriminant.
318
319 // Kick vs Tom: both have moderate bass. Sub-bass proxy separates them.
320 // Kick has sub-bass body resonance (subBassProxy > 0.5), tom doesn't.
321 if (mBassToTotal > 0.20f) {
322 if (mSubBassProxy > 0.5f) {
323 // Sub-bass present → kick-like, suppress tom
324 mTomConfidence *= 0.3f;
325 } else if (mSubBassProxy < 0.3f) {
326 // No sub-bass → tom-like, suppress kick
327 mKickConfidence *= 0.3f;
328 }
329 }
330
331 // HiHat vs Snare: both have high treble. Discriminate by bass content.
332 if (mTrebleToTotal > 0.60f && mBassToTotal < 0.12f) {
333 mSnareConfidence *= 0.3f;
334 }
335
336 // Crash/broadband rejection: very high treble with no bass
337 if (mTrebleToTotal > 0.70f && mBassToTotal < 0.10f) {
338 mSnareConfidence *= 0.3f;
339 mKickConfidence *= 0.2f;
340 mTomConfidence *= 0.2f;
341 }
342
343 // Noise rejection: white noise has very low treble flatness (< 0.50)
345 mSnareConfidence *= 0.3f;
346 }
347
348 // Very high click ratio + high treble → hi-hat, not snare
349 if (mClickRatio > 3.0f && mTrebleToTotal > 0.60f) {
350 mSnareConfidence *= 0.3f;
351 }
352}
353
354} // namespace detector
355} // namespace audio
356} // namespace fl
static constexpr u32 SNARE_COOLDOWN_MS
Definition percussion.h:108
shared_ptr< const fft::Bins > mRetainedFFT
Definition percussion.h:105
AttackDecayFilter< float > mTotalEnvelope
Definition percussion.h:97
function_list< void(PercussionType type)> onPercussionHit
Definition percussion.h:33
function_list< void()> onKick
Definition percussion.h:34
static constexpr u32 KICK_COOLDOWN_MS
Definition percussion.h:107
function_list< void()> onSnare
Definition percussion.h:35
static constexpr u32 HIHAT_COOLDOWN_MS
Definition percussion.h:109
function_list< void()> onTom
Definition percussion.h:37
void computeFeatures(const fft::Bins &fft)
~Percussion() FL_NOEXCEPT override
void update(shared_ptr< Context > context) override
static constexpr u32 TOM_COOLDOWN_MS
Definition percussion.h:110
function_list< void()> onHiHat
Definition percussion.h:36
float computeAudioDt(fl::size pcmSize, int sampleRate) FL_NOEXCEPT
Compute the time delta (in seconds) for an audio buffer.
FL_DISABLE_WARNING_PUSH U constexpr common_type_t< T, U > min(T a, U b) FL_NOEXCEPT
Definition math.h:71
constexpr common_type_t< T, U > max(T a, U b) FL_NOEXCEPT
Definition math.h:75
float expf(float value) FL_NOEXCEPT
Definition math.h:398
constexpr enable_if< is_fixed_point< T >::value, T >::type abs(T x) FL_NOEXCEPT
float logf(float value) FL_NOEXCEPT
Definition math.h:418
constexpr enable_if< is_fixed_point< T >::value, T >::type clamp(T x, T lo, T hi) FL_NOEXCEPT
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_NOEXCEPT