Speeduino
Loading...
Searching...
No Matches
bit_shifts.h
Go to the documentation of this file.
1#pragma once
2
7#include <stdint.h>
8#include "globals.h" // Required for CPU/architecture preprocessor symbols
9
26
27// Flag if we should turn on optimized shifts
28#if !defined(USE_OPTIMIZED_SHIFTS)
29#if (defined(CORE_AVR) || defined(ARDUINO_ARCH_AVR)) && defined(__GNUC__)
30#define USE_OPTIMIZED_SHIFTS 1
31#else
32#define USE_OPTIMIZED_SHIFTS 0
33#endif
34#endif
35
43template <uint8_t b>
44static inline uint32_t lshift(uint32_t a) {
45#if USE_OPTIMIZED_SHIFTS==1
46 // The shifts below have been validated to produce performant code in GCC.
47 // Other shift amounts are either in a specialized template below (good) or are unvalidated (bad).
48 static_assert(b==1 || b==2 || b==3 || b==8 || b==16 || b==24,
49 "Unvalidated shift - confirm gcc produces performant code");
50#endif
51 return a << b;
52}
53
54#if USE_OPTIMIZED_SHIFTS==1
55
56#pragma GCC diagnostic push
57#pragma GCC diagnostic ignored "-Wunused-function"
58
66template <>
67uint32_t lshift<4U>(uint32_t a)
68{
69 asm(
70 "swap %D0\n"
71 "andi %D0, 240\n"
72 "swap %C0\n"
73 "eor %D0, %C0\n"
74 "andi %C0, 240\n"
75 "eor %D0, %C0\n"
76 "swap %B0\n"
77 "eor %C0, %B0\n"
78 "andi %B0, 240\n"
79 "eor %C0, %B0\n"
80 "swap %A0\n"
81 "eor %B0, %A0\n"
82 "andi %A0, 240\n"
83 "eor %B0, %A0\n"
84 : "=d" (a)
85 : "0" (a)
86 :
87 );
88
89 return a;
90}
91
92template <>
93uint32_t lshift<5U>(uint32_t a)
94{
95 asm(
96 "swap %D0\n"
97 "andi %D0, 240\n"
98 "swap %C0\n"
99 "eor %D0, %C0\n"
100 "andi %C0, 240\n"
101 "eor %D0, %C0\n"
102 "swap %B0\n"
103 "eor %C0, %B0\n"
104 "andi %B0, 240\n"
105 "eor %C0, %B0\n"
106 "swap %A0\n"
107 "eor %B0, %A0\n"
108 "andi %A0, 240\n"
109 "eor %B0, %A0\n"
110 "lsl %A0\n"
111 "rol %B0\n"
112 "rol %C0\n"
113 "rol %D0\n"
114 : "=d" (a)
115 : "0" (a)
116 :
117 );
118
119 return a;
120}
121
122template <>
123uint32_t lshift<6U>(uint32_t a)
124{
125 asm(
126 "lsr %D0\n"
127 "ror %C0\n"
128 "ror %B0\n"
129 "ror %A0\n"
130 "mov r18, __zero_reg__\n"
131 "ror r18\n"
132 "lsr %D0\n"
133 "ror %C0\n"
134 "ror %B0\n"
135 "ror %A0\n"
136 "ror r18\n"
137 "mov %D0, %C0\n"
138 "mov %C0, %B0\n"
139 "mov r19, %A0\n"
140 "movw %A0, r18\n"
141 : "=d" (a)
142 : "0" (a)
143 : "r18", "r19"
144 );
145
146 return a;
147}
148
149template <>
150uint32_t lshift<7U>(uint32_t a)
151{
152 asm(
153 "lsr %D0\n"
154 "ror %C0\n"
155 "ror %B0\n"
156 "ror %A0\n"
157 "mov r18, __zero_reg__\n"
158 "ror r18\n"
159 "mov %D0, %C0\n"
160 "mov %C0, %B0\n"
161 "mov r19, %A0\n"
162 "movw %A0, r18\n"
163 : "=d" (a)
164 : "0" (a)
165 : "r18", "r19"
166 );
167
168 return a;
169}
170
171template <>
172uint32_t lshift<9U>(uint32_t a)
173{
174 asm(
175 "lsl %A0\n"
176 "rol %B0\n"
177 "rol %C0\n"
178 "mov %D0, %C0\n"
179 "mov %C0, %B0\n"
180 "mov %B0, %A0\n"
181 "mov %A0, __zero_reg__\n"
182 : "=d" (a)
183 : "0" (a)
184 :
185 );
186
187 return a;
188}
189
190template <>
191uint32_t lshift<10U>(uint32_t a)
192{
193 asm(
194 "lsl %A0\n"
195 "rol %B0\n"
196 "rol %C0\n"
197 "lsl %A0\n"
198 "rol %B0\n"
199 "rol %C0\n"
200 "mov %D0, %C0\n"
201 "mov %C0, %B0\n"
202 "mov %B0, %A0\n"
203 "mov %A0, __zero_reg__\n"
204 : "=d" (a)
205 : "0" (a)
206 :
207 );
208
209 return a;
210}
211
212template <>
213uint32_t lshift<11U>(uint32_t a)
214{
215 asm(
216 "lsl %A0\n"
217 "rol %B0\n"
218 "rol %C0\n"
219 "lsl %A0\n"
220 "rol %B0\n"
221 "rol %C0\n"
222 "lsl %A0\n"
223 "rol %B0\n"
224 "rol %C0\n"
225 "mov %D0, %C0\n"
226 "mov %C0, %B0\n"
227 "mov %B0, %A0\n"
228 "mov %A0, __zero_reg__\n"
229 : "=d" (a)
230 : "0" (a)
231 :
232 );
233
234 return a;
235}
236
237template <>
238uint32_t lshift<12U>(uint32_t a)
239{
240 asm(
241 "swap %C0\n"
242 "andi %C0, 240\n"
243 "swap %B0\n"
244 "eor %C0, %B0\n"
245 "andi %B0, 240\n"
246 "eor %C0, %B0\n"
247 "swap %A0\n"
248 "eor %B0, %A0\n"
249 "andi %A0, 240\n"
250 "eor %B0, %A0\n"
251 "mov %D0, %C0\n"
252 "mov %C0, %B0\n"
253 "mov %B0, %A0\n"
254 "mov %A0, __zero_reg__\n"
255 : "=d" (a)
256 : "0" (a)
257 :
258 );
259
260 return a;
261}
262
263template <>
264uint32_t lshift<13U>(uint32_t a)
265{
266 asm(
267 "swap %C0\n"
268 "andi %C0, 240\n"
269 "swap %B0\n"
270 "eor %C0, %B0\n"
271 "andi %B0, 240\n"
272 "eor %C0, %B0\n"
273 "swap %A0\n"
274 "eor %B0, %A0\n"
275 "andi %A0, 240\n"
276 "eor %B0, %A0\n"
277 "lsl %A0\n"
278 "rol %B0\n"
279 "rol %C0\n"
280 "mov %D0, %C0\n"
281 "mov %C0, %B0\n"
282 "mov %B0, %A0\n"
283 "mov %A0, __zero_reg__\n"
284 : "=d" (a)
285 : "0" (a)
286 :
287 );
288
289 return a;
290}
291
292template <>
293uint32_t lshift<14U>(uint32_t a)
294{
295 asm(
296 "movw r18, %A0\n"
297 "lsr %C0\n"
298 "ror r19\n"
299 "ror r18\n"
300 "mov %B0, __zero_reg__\n"
301 "ror %B0\n"
302 "lsr %C0\n"
303 "ror r19\n"
304 "ror r18\n"
305 "ror %B0\n"
306 "mov %A0, __zero_reg__\n"
307 "movw %C0, r18\n"
308 : "=d" (a)
309 : "0" (a)
310 : "r18", "r19"
311 );
312
313 return a;
314}
315
316template <>
317uint32_t lshift<15U>(uint32_t a)
318{
319 asm(
320 "movw r18, %A0\n"
321 "lsr %C0\n"
322 "ror r19\n"
323 "ror r18\n"
324 "mov %B0, __zero_reg__\n"
325 "ror %B0\n"
326 "mov %A0, __zero_reg__\n"
327 "movw %C0, r18\n"
328 : "=d" (a)
329 : "0" (a)
330 : "r18", "r19"
331 );
332
333 return a;
334}
336
337#pragma GCC diagnostic pop
338
339#endif
340
348template <uint8_t b>
349static inline uint32_t rshift(uint32_t a) {
350#if USE_OPTIMIZED_SHIFTS==1 // The shifts below have been validated to produce performant code in GCC.
351 // Other shift amounts are either in a specialized template below (good) or are unvalidated (bad).
352 static_assert(b==1 || b==2 || b==8 || b==16 || b==24,
353 "Unvalidated shift - confirm gcc produces performant code");
354#endif
355 return a >> b;
356}
357
358#if USE_OPTIMIZED_SHIFTS==1
359
360#pragma GCC diagnostic push
361#pragma GCC diagnostic ignored "-Wunused-function"
362
370template <>
371uint32_t rshift<3U>(uint32_t a)
372{
373 asm(
374 "lsr %D0\n"
375 "ror %C0\n"
376 "ror %B0\n"
377 "ror %A0\n"
378 "lsr %D0\n"
379 "ror %C0\n"
380 "ror %B0\n"
381 "ror %A0\n"
382 "lsr %D0\n"
383 "ror %C0\n"
384 "ror %B0\n"
385 "ror %A0\n"
386 : "=d" (a)
387 : "0" (a)
388 :
389 );
390
391 return a;
392}
393
394template <>
395uint32_t rshift<4U>(uint32_t a)
396{
397 asm(
398 "swap %A0\n"
399 "andi %A0, 15\n"
400 "swap %B0\n"
401 "eor %A0, %B0\n"
402 "andi %B0, 15\n"
403 "eor %A0, %B0\n"
404 "swap %C0\n"
405 "eor %B0, %C0\n"
406 "andi %C0, 15\n"
407 "eor %B0, %C0\n"
408 "swap %D0\n"
409 "eor %C0, %D0\n"
410 "andi %D0, 15\n"
411 "eor %C0, %D0\n"
412 : "=d" (a)
413 : "0" (a)
414 :
415 );
416
417 return a;
418}
419
420template <>
421uint32_t rshift<5U>(uint32_t a)
422{
423 asm(
424 "swap %A0\n"
425 "andi %A0, 15\n"
426 "swap %B0\n"
427 "eor %A0, %B0\n"
428 "andi %B0, 15\n"
429 "eor %A0, %B0\n"
430 "swap %C0\n"
431 "eor %B0, %C0\n"
432 "andi %C0, 15\n"
433 "eor %B0, %C0\n"
434 "swap %D0\n"
435 "eor %C0, %D0\n"
436 "andi %D0, 15\n"
437 "eor %C0, %D0\n"
438 "lsr %D0\n"
439 "ror %C0\n"
440 "ror %B0\n"
441 "ror %A0\n"
442 : "=d" (a)
443 : "0" (a)
444 :
445 );
446
447 return a;
448}
449
450template <>
451uint32_t rshift<6U>(uint32_t a)
452{
453 asm(
454 "lsl %A0\n"
455 "rol %B0\n"
456 "rol %C0\n"
457 "rol %D0\n"
458 "mov r19, __zero_reg__\n"
459 "rol r19\n"
460 "lsl %A0\n"
461 "rol %B0\n"
462 "rol %C0\n"
463 "rol %D0\n"
464 "rol r19\n"
465 "mov %A0, %B0\n"
466 "mov %B0, %C0\n"
467 "mov r18, %D0\n"
468 "movw %C0, r18\n"
469 : "=d" (a)
470 : "0" (a)
471 : "r18", "r19"
472 );
473
474 return a;
475}
476
477template <>
478uint32_t rshift<7U>(uint32_t a)
479{
480 asm(
481 "lsl %A0\n"
482 "rol %B0\n"
483 "rol %C0\n"
484 "rol %D0\n"
485 "mov r19, __zero_reg__\n"
486 "rol r19\n"
487 "mov %A0, %B0\n"
488 "mov %B0, %C0\n"
489 "mov r18, %D0\n"
490 "movw %C0, r18\n"
491 : "=d" (a)
492 : "0" (a)
493 : "r18", "r19"
494 );
495
496 return a;
497}
498
499template <>
500uint32_t rshift<9U>(uint32_t a)
501{
502 asm(
503 "lsr %D0\n"
504 "ror %C0\n"
505 "ror %B0\n"
506 "mov %A0, %B0\n"
507 "mov %B0, %C0\n"
508 "mov %C0, %D0\n"
509 "mov %D0, __zero_reg__\n"
510 : "=d" (a)
511 : "0" (a)
512 :
513 );
514
515 return a;
516}
517
518template <>
519uint32_t rshift<10U>(uint32_t a)
520{
521 asm(
522 "lsr %D0\n"
523 "ror %C0\n"
524 "ror %B0\n"
525 "lsr %D0\n"
526 "ror %C0\n"
527 "ror %B0\n"
528 "mov %A0, %B0\n"
529 "mov %B0, %C0\n"
530 "mov %C0, %D0\n"
531 "mov %D0, __zero_reg__\n"
532 : "=d" (a)
533 : "0" (a)
534 :
535 );
536
537 return a;
538}
539
540template <>
541uint32_t rshift<11U>(uint32_t a)
542{
543 asm(
544 "lsr %D0\n"
545 "ror %C0\n"
546 "ror %B0\n"
547 "lsr %D0\n"
548 "ror %C0\n"
549 "ror %B0\n"
550 "lsr %D0\n"
551 "ror %C0\n"
552 "ror %B0\n"
553 "mov %A0, %B0\n"
554 "mov %B0, %C0\n"
555 "mov %C0, %D0\n"
556 "mov %D0, __zero_reg__\n"
557 : "=d" (a)
558 : "0" (a)
559 :
560 );
561
562 return a;
563}
564
565template <>
566uint32_t rshift<12U>(uint32_t a)
567{
568 asm(
569 "swap %B0\n"
570 "andi %B0, 15\n"
571 "swap %C0\n"
572 "eor %B0, %C0\n"
573 "andi %C0, 15\n"
574 "eor %B0, %C0\n"
575 "swap %D0\n"
576 "eor %C0, %D0\n"
577 "andi %D0, 15\n"
578 "eor %C0, %D0\n"
579 "mov %A0, %B0\n"
580 "mov %B0, %C0\n"
581 "mov %C0, %D0\n"
582 "mov %D0, __zero_reg__\n"
583 : "=d" (a)
584 : "0" (a)
585 :
586 );
587
588 return a;
589}
590
591template <>
592uint32_t rshift<13U>(uint32_t a)
593{
594 asm(
595 "swap %B0\n"
596 "andi %B0, 15\n"
597 "swap %C0\n"
598 "eor %B0, %C0\n"
599 "andi %C0, 15\n"
600 "eor %B0, %C0\n"
601 "swap %D0\n"
602 "eor %C0, %D0\n"
603 "andi %D0, 15\n"
604 "eor %C0, %D0\n"
605 "lsr %D0\n"
606 "ror %C0\n"
607 "ror %B0\n"
608 "mov %A0, %B0\n"
609 "mov %B0, %C0\n"
610 "mov %C0, %D0\n"
611 "mov %D0, __zero_reg__\n"
612 : "=d" (a)
613 : "0" (a)
614 :
615 );
616
617 return a;
618}
619
620template <>
621uint32_t rshift<14U>(uint32_t a)
622{
623 asm(
624 "movw r18, %C0\n"
625 "lsl %B0\n"
626 "rol r18\n"
627 "rol r19\n"
628 "mov %C0, __zero_reg__\n"
629 "rol %C0\n"
630 "lsl %B0\n"
631 "rol r18\n"
632 "rol r19\n"
633 "rol %C0\n"
634 "mov %D0, __zero_reg__\n"
635 "movw %A0, r18\n"
636 : "=d" (a)
637 : "0" (a)
638 : "r18", "r19"
639 );
640
641 return a;
642}
643
644template <>
645uint32_t rshift<15U>(uint32_t a)
646{
647 asm(
648 "movw r18, %C0\n"
649 "lsl %B0\n"
650 "rol r18\n"
651 "rol r19\n"
652 "mov %C0, __zero_reg__\n"
653 "rol %C0\n"
654 "mov %D0, __zero_reg__\n"
655 "movw %A0, r18\n"
656 : "=d" (a)
657 : "0" (a)
658 : "r18", "r19"
659 );
660
661 return a;
662}
663
665
666#pragma GCC diagnostic pop
667
668#endif
669
682template <uint8_t b>
683static inline uint32_t rshift_round(uint32_t a) {
684 return rshift<b>(a+(1UL<<(b-1UL)));
685}
686
uint32_t lshift< 6U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:123
uint32_t rshift< 4U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:395
uint32_t lshift< 15U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:317
uint32_t rshift< 3U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:371
static uint32_t lshift(uint32_t a)
Bitwise left shift - generic, unoptimized, case.
Definition bit_shifts.h:44
uint32_t rshift< 9U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:500
uint32_t lshift< 4U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:67
uint32_t lshift< 11U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:213
uint32_t lshift< 14U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:293
uint32_t rshift< 5U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:421
uint32_t rshift< 6U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:451
uint32_t rshift< 15U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:645
uint32_t rshift< 10U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:519
uint32_t lshift< 10U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:191
uint32_t lshift< 12U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:238
uint32_t rshift< 13U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:592
uint32_t rshift< 14U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:621
static uint32_t rshift_round(uint32_t a)
Rounded arithmetic right shift.
Definition bit_shifts.h:683
uint32_t lshift< 9U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:172
static uint32_t rshift(uint32_t a)
Bitwise right shift - generic, unoptimized, case.
Definition bit_shifts.h:349
uint32_t lshift< 13U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:264
uint32_t rshift< 11U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:541
uint32_t rshift< 12U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:566
uint32_t rshift< 7U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:478
uint32_t lshift< 5U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:93
uint32_t lshift< 7U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:150
static uint8_t a
Definition maths.cpp:7