FastLED 3.9.15
Loading...
Searching...
No Matches
assembly.h
Go to the documentation of this file.
1/* ***** BEGIN LICENSE BLOCK *****
2 * Version: RCSL 1.0/RPSL 1.0
3 *
4 * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file, are
7 * subject to the current version of the RealNetworks Public Source License
8 * Version 1.0 (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the RealNetworks Community Source License Version 1.0
11 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
12 * in which case the RCSL will apply. You may also obtain the license terms
13 * directly from RealNetworks. You may not use this file except in
14 * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
15 * applicable to this file, the RCSL. Please see the applicable RPSL or
16 * RCSL for the rights, obligations and limitations governing use of the
17 * contents of the file.
18 *
19 * This file is part of the Helix DNA Technology. RealNetworks is the
20 * developer of the Original Code and owns the copyrights in the portions
21 * it created.
22 *
23 * This file, and the files included with this file, is distributed and made
24 * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
25 * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
26 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
27 * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
28 *
29 * Technology Compatibility Kit Test Suite(s) Location:
30 * http://www.helixcommunity.org/content/tck
31 *
32 * Contributor(s):
33 *
34 * ***** END LICENSE BLOCK ***** */
35
36/**************************************************************************************
37 * Fixed-point MP3 decoder
38 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
39 * June 2003
40 *
41 * assembly.h - assembly language functions and prototypes for supported platforms
42 *
43 * - inline rountines with access to 64-bit multiply results
44 * - x86 (_WIN32) and ARM (ARM_ADS, _WIN32_WCE) versions included
45 * - some inline functions are mix of asm and C for speed
46 * - some functions are in native asm files, so only the prototype is given here
47 *
48 * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y), returns top 32 bits of 64-bit result
49 * FASTABS(x) branchless absolute value of signed integer x
50 * CLZ(x) count leading zeros in x
51 * MADD64(sum, x, y) (Windows only) sum [64-bit] += x [32-bit] * y [32-bit]
52 * SHL64(sum, x, y) (Windows only) 64-bit left shift using __int64
53 * SAR64(sum, x, y) (Windows only) 64-bit right shift using __int64
54 */
55
56#ifndef _ASSEMBLY_H
57#define _ASSEMBLY_H
58
59#include "fl/stl/noexcept.h"
60
61namespace fl {
62namespace third_party {
63
64#if (defined _WIN32 && !defined _WIN32_WCE && defined _M_IX86) || (defined __WINS__ && defined _SYMBIAN) || defined(_OPENWAVE_SIMULATOR) || defined(WINCE_EMULATOR) /* Symbian emulator for Ix86 */
65
66#pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
67
69{
70 __asm {
71 mov eax, x
72 imul y
73 mov eax, edx
74 }
75}
76
77__inline int FASTABS(int x)
78{
79 int sign;
80
81 sign = x >> (sizeof(int) * 8 - 1);
82 x ^= sign;
83 x -= sign;
84
85 return x;
86}
87
88__inline int CLZ(int x)
89{
90 int numZeros;
91
92 if (!x)
93 return (sizeof(int) * 8);
94
95 numZeros = 0;
96 while (!(x & 0x80000000)) {
97 numZeros++;
98 x <<= 1;
99 }
100
101 return numZeros;
102}
103
104/* MADD64, SHL64, SAR64:
105 * write in assembly to avoid dependency on run-time lib for 64-bit shifts, muls
106 * (sometimes compiler thunks to function calls instead of code generating)
107 * required for Symbian emulator
108 */
109#ifdef __CW32__
110typedef long long Word64;
111#else
112typedef __int64 Word64;
113#endif
114
115__inline Word64 MADD64(Word64 sum, int x, int y)
116{
117 unsigned int sumLo = ((unsigned int *)&sum)[0];
118 int sumHi = ((int *)&sum)[1];
119
120 __asm {
121 mov eax, x
122 imul y
123 add eax, sumLo
124 adc edx, sumHi
125 }
126
127 /* equivalent to return (sum + ((__int64)x * y)); */
128}
129
130__inline Word64 SHL64(Word64 x, int n)
131{
132 unsigned int xLo = ((unsigned int *)&x)[0];
133 int xHi = ((int *)&x)[1];
134 unsigned char nb = (unsigned char)n;
135
136 if (n < 32) {
137 __asm {
138 mov edx, xHi
139 mov eax, xLo
140 mov cl, nb
141 shld edx, eax, cl
142 shl eax, cl
143 }
144 } else if (n < 64) {
145 /* shl masks cl to 0x1f */
146 __asm {
147 mov edx, xLo
148 mov cl, nb
149 xor eax, eax
150 shl edx, cl
151 }
152 } else {
153 __asm {
154 xor edx, edx
155 xor eax, eax
156 }
157 }
158}
159
160__inline Word64 SAR64(Word64 x, int n)
161{
162 unsigned int xLo = ((unsigned int *)&x)[0];
163 int xHi = ((int *)&x)[1];
164 unsigned char nb = (unsigned char)n;
165
166 if (n < 32) {
167 __asm {
168 mov edx, xHi
169 mov eax, xLo
170 mov cl, nb
171 shrd eax, edx, cl
172 sar edx, cl
173 }
174 } else if (n < 64) {
175 /* sar masks cl to 0x1f */
176 __asm {
177 mov edx, xHi
178 mov eax, xHi
179 mov cl, nb
180 sar edx, 31
181 sar eax, cl
182 }
183 } else {
184 __asm {
185 sar xHi, 31
186 mov eax, xHi
187 mov edx, xHi
188 }
189 }
190}
191
192#elif (defined _WIN32) && (defined _WIN32_WCE)
193
194/* use asm function for now (EVC++ 3.0 does horrible job compiling __int64 version) */
195#define MULSHIFT32 xmp3_MULSHIFT32
197
198__inline int FASTABS(int x)
199{
200 int sign;
201
202 sign = x >> (sizeof(int) * 8 - 1);
203 x ^= sign;
204 x -= sign;
205
206 return x;
207}
208
209__inline int CLZ(int x)
210{
211 int numZeros;
212
213 if (!x)
214 return (sizeof(int) * 8);
215
216 numZeros = 0;
217 while (!(x & 0x80000000)) {
218 numZeros++;
219 x <<= 1;
220 }
221
222 return numZeros;
223}
224
225#elif defined ARM_ADS
226
228{
229 /* important rules for smull RdLo, RdHi, Rm, Rs:
230 * RdHi and Rm can't be the same register
231 * RdLo and Rm can't be the same register
232 * RdHi and RdLo can't be the same register
233 * Note: Rs determines early termination (leading sign bits) so if you want to specify
234 * which operand is Rs, put it in the SECOND argument (y)
235 * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
236 * which one is returned. (If this were a function call, returning y (R1) would
237 * require an extra "mov r0, r1")
238 */
239 int zlow;
240 __asm {
241 smull zlow,y,x,y
242 }
243
244 return y;
245}
246
247__inline int FASTABS(int x)
248{
249 int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
250
251 __asm {
252 eor t, x, x, asr #31
253 sub t, t, x, asr #31
254 }
255
256 return t;
257}
258
259__inline int CLZ(int x)
260{
261 int numZeros;
262
263 if (!x)
264 return (sizeof(int) * 8);
265
266 numZeros = 0;
267 while (!(x & 0x80000000)) {
268 numZeros++;
269 x <<= 1;
270 }
271
272 return numZeros;
273}
274
275#elif defined(__GNUC__) && defined(ARM)
276
277#if defined(ARM7DI)
278
279typedef long long Word64;
280
281__inline int32_t MULSHIFT32(int32_t x, int32_t y) {
282 return x * y;
283}
284
285__inline Word64 SAR64(Word64 x, int n) {
286 return x >>= n;
287}
288
289
290typedef union _U64 {
291 Word64 w64;
292 struct {
293 /* x86 = little endian */
294 unsigned int lo32;
295 signed int hi32;
296 } r;
297} U64;
298
299__inline Word64 MADD64(Word64 sum64, int x, int y)
300{
301 sum64 += (Word64)x * (Word64)y;
302
303 return sum64;
304}
305
306#else
307
309{
310 /* important rules for smull RdLo, RdHi, Rm, Rs:
311 * RdHi and Rm can't be the same register
312 * RdLo and Rm can't be the same register
313 * RdHi and RdLo can't be the same register
314 * Note: Rs determines early termination (leading sign bits) so if you want to specify
315 * which operand is Rs, put it in the SECOND argument (y)
316 * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
317 * which one is returned. (If this were a function call, returning y (R1) would
318 * require an extra "mov r0, r1")
319 */
320 int zlow;
321 __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
322
323 return y;
324}
325
326#endif
327
328__inline int FASTABS(int x)
329{
330 int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
331
332 __asm__ volatile (
333 "eor %0,%2,%2, asr #31;"
334 "sub %0,%1,%2, asr #31;"
335 : "=&r" (t)
336 : "0" (t), "r" (x)
337 );
338
339 return t;
340}
341
342__inline int CLZ(int x)
343{
344 int numZeros;
345
346 if (!x)
347 return (sizeof(int) * 8);
348
349 numZeros = 0;
350 while (!(x & 0x80000000)) {
351 numZeros++;
352 x <<= 1;
353 }
354
355 return numZeros;
356}
357
358#else
359
360#ifdef __riscv
361
362typedef long long Word64;
363
365{
366 unsigned int result = 0;
367 asm volatile ("mulh %0, %1, %2" : "=r"(result): "r"(x), "r"(y));
368 return result;
369}
370
371__inline int FASTABS(int x)
372{
373 int sign;
374
375 sign = x >> (sizeof(int) * 8 - 1);
376 x ^= sign;
377 x -= sign;
378
379 return x;
380}
381
382__inline int CLZ(int x)
383{
384 int numZeros;
385
386 if (!x)
387 return (sizeof(int) * 8);
388
389 numZeros = 0;
390 while (!(x & 0x80000000)) {
391 numZeros++;
392 x <<= 1;
393 }
394
395 return numZeros;
396}
397
398__inline Word64 MADD64(Word64 sum, int a, int b)
399{
400 unsigned int result_hi = 0;
401 unsigned int result_lo = 0;
402 asm volatile ("mulh %0, %1, %2" : "=r"(result_hi): "r"(a), "r"(b));
403 asm volatile ("mul %0, %1, %2" : "=r"(result_lo): "r"(a), "r"(b));
404
405 Word64 result = result_hi;
406 result <<= 32;
407 result += result_lo;
408 result += sum;
409 return result;
410}
411
412__inline Word64 SHL64(Word64 x, int n)
413{
414 return (x<<n);
415}
416
417__inline Word64 SAR64(Word64 x, int n)
418{
419 return (x >> n);
420}
421
422#elif defined(__xtensa__)
423
424#include "xtensa/config/core-isa.h"
425
426typedef long long Word64;
427
428__inline Word64 MADD64(Word64 sum64, int x, int y)
429{
430 return (sum64 + ((long long)x * y));
431}
432
433#if XCHAL_HAVE_MUL32_HIGH
434
436{
437 /* important rules for smull RdLo, RdHi, Rm, Rs:
438 * RdHi and Rm can't be the same register
439 * RdLo and Rm can't be the same register
440 * RdHi and RdLo can't be the same register
441 * Note: Rs determines early termination (leading sign bits) so if you want to specify
442 * which operand is Rs, put it in the SECOND argument (y)
443 * For inline assembly, x and y are not assumed to be R0, R1 so it doesn't matter
444 * which one is returned. (If this were a function call, returning y (R1) would
445 * require an extra "mov r0, r1")
446 */
447 int ret;
448 asm volatile ("mulsh %0, %1, %2" : "=r" (ret) : "r" (x), "r" (y));
449 return ret;
450}
451
452#else
453
454// Fallback for Xtensa without MUL32_HIGH (e.g., ESP8266)
456{
457 Word64 result = ((Word64) x) * y;
458 return (int32_t)(result >> 32);
459}
460
461#endif
462
463#if XCHAL_HAVE_ABS
464
465__inline int FASTABS(int x)
466{
467 int ret;
468 asm volatile ("abs %0, %1" : "=r" (ret) : "r" (x));
469 return ret;
470}
471
472#else
473
474// Fallback for Xtensa without ABS instruction (e.g., ESP8266)
475__inline int FASTABS(int x)
476{
477 int sign;
478 sign = x >> (sizeof(int) * 8 - 1);
479 x ^= sign;
480 x -= sign;
481 return x;
482}
483
484#endif
485
486__inline Word64 SAR64(Word64 x, int n) FL_NOEXCEPT
487{
488 return x >> n;
489}
490
491__inline int CLZ(int x)
492{
493 return __builtin_clz(x);
494}
495
496#else
497
498typedef long long Word64;
499
504{
505 Word64 result = ((Word64) x) * y;
506
507 return (int32_t)(result >> 32);
508}
509
514{
516
517 sign = x >> (sizeof(int32_t) * 8 - 1);
518 x ^= sign;
519 x -= sign;
520
521 return x;
522}
523
528{
529 int32_t numZeros;
530
531 if (!x)
532 return (sizeof(int32_t) * 8);
533
534 numZeros = 0;
535 while (!(x & 0x80000000)) {
536 numZeros++;
537 x <<= 1;
538 }
539
540 return numZeros;
541}
542
546__inline Word64 MADD64(Word64 sum64, int x, int y) FL_NOEXCEPT
547{
548 sum64 += (Word64)x * (Word64)y;
549
550 return sum64;
551}
552
557{
558 return ((Word64) x) << n;
559}
560
565{
566 return x >> n;
567}
568
569#endif
570
571#endif /* platforms */
572
573} /* namespace third_party */
574} /* namespace fl */
575
576#endif /* _ASSEMBLY_H */
__inline int32_t MULSHIFT32(int32_t x, int32_t y) FL_NOEXCEPT
Multiply together two 32-bit numbers and return the top 32-bits of the result.
Definition assembly.h:503
__inline int32_t FASTABS(int32_t x) FL_NOEXCEPT
Absolute value of x.
Definition assembly.h:513
__inline Word64 SHL64(Word64 x, int n) FL_NOEXCEPT
Shift left.
Definition assembly.h:556
__inline Word64 MADD64(Word64 sum64, int x, int y) FL_NOEXCEPT
Increase sum by x * y.
Definition assembly.h:546
fl::i32 int32_t
Definition coder.h:220
__inline int32_t CLZ(int32_t x) FL_NOEXCEPT
Leading zeros.
Definition assembly.h:527
long long Word64
Definition assembly.h:498
__inline Word64 SAR64(Word64 x, int n) FL_NOEXCEPT
Shift right.
Definition assembly.h:564
constexpr enable_if< is_fixed_point< T >::value, int >::type sign(T x) FL_NOEXCEPT
expected< T, E > result
Alias for expected (Rust-style naming)
Definition result.h:31
float add(float &a, float &b)
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_NOEXCEPT