Speeduino
Loading...
Searching...
No Matches
bit_shifts.h
Go to the documentation of this file.
1#pragma once
2
7#include <stdint.h>
8
25
26// Flag if we should turn on optimized shifts
27#if !defined(USE_OPTIMIZED_SHIFTS)
28#if (defined(__AVR__) && defined(__GNUC__))
29#define USE_OPTIMIZED_SHIFTS 1
30#else
31#define USE_OPTIMIZED_SHIFTS 0
32#endif
33#endif
34
42template <uint8_t b>
43static inline uint32_t lshift(uint32_t a) {
44#if USE_OPTIMIZED_SHIFTS==1
45 // The shifts below have been validated to produce performant code in GCC.
46 // Other shift amounts are either in a specialized template below (good) or are unvalidated (bad).
47 static_assert(b==1 || b==2 || b==3 || b==8 || b==16 || b==24,
48 "Unvalidated shift - confirm gcc produces performant code");
49#endif
50 return a << b;
51}
52
53#if USE_OPTIMIZED_SHIFTS==1
54
55#pragma GCC diagnostic push
56#pragma GCC diagnostic ignored "-Wunused-function"
57
65template <>
66uint32_t lshift<4U>(uint32_t a)
67{
68 asm(
69 "swap %D0\n"
70 "andi %D0, 240\n"
71 "swap %C0\n"
72 "eor %D0, %C0\n"
73 "andi %C0, 240\n"
74 "eor %D0, %C0\n"
75 "swap %B0\n"
76 "eor %C0, %B0\n"
77 "andi %B0, 240\n"
78 "eor %C0, %B0\n"
79 "swap %A0\n"
80 "eor %B0, %A0\n"
81 "andi %A0, 240\n"
82 "eor %B0, %A0\n"
83 : "=d" (a)
84 : "0" (a)
85 :
86 );
87
88 return a;
89}
90
91template <>
92uint32_t lshift<5U>(uint32_t a)
93{
94 asm(
95 "swap %D0\n"
96 "andi %D0, 240\n"
97 "swap %C0\n"
98 "eor %D0, %C0\n"
99 "andi %C0, 240\n"
100 "eor %D0, %C0\n"
101 "swap %B0\n"
102 "eor %C0, %B0\n"
103 "andi %B0, 240\n"
104 "eor %C0, %B0\n"
105 "swap %A0\n"
106 "eor %B0, %A0\n"
107 "andi %A0, 240\n"
108 "eor %B0, %A0\n"
109 "lsl %A0\n"
110 "rol %B0\n"
111 "rol %C0\n"
112 "rol %D0\n"
113 : "=d" (a)
114 : "0" (a)
115 :
116 );
117
118 return a;
119}
120
121template <>
122uint32_t lshift<6U>(uint32_t a)
123{
124 asm(
125 "lsr %D0\n"
126 "ror %C0\n"
127 "ror %B0\n"
128 "ror %A0\n"
129 "mov r18, __zero_reg__\n"
130 "ror r18\n"
131 "lsr %D0\n"
132 "ror %C0\n"
133 "ror %B0\n"
134 "ror %A0\n"
135 "ror r18\n"
136 "mov %D0, %C0\n"
137 "mov %C0, %B0\n"
138 "mov r19, %A0\n"
139 "movw %A0, r18\n"
140 : "=d" (a)
141 : "0" (a)
142 : "r18", "r19"
143 );
144
145 return a;
146}
147
148template <>
149uint32_t lshift<7U>(uint32_t a)
150{
151 asm(
152 "lsr %D0\n"
153 "ror %C0\n"
154 "ror %B0\n"
155 "ror %A0\n"
156 "mov r18, __zero_reg__\n"
157 "ror r18\n"
158 "mov %D0, %C0\n"
159 "mov %C0, %B0\n"
160 "mov r19, %A0\n"
161 "movw %A0, r18\n"
162 : "=d" (a)
163 : "0" (a)
164 : "r18", "r19"
165 );
166
167 return a;
168}
169
170template <>
171uint32_t lshift<9U>(uint32_t a)
172{
173 asm(
174 "lsl %A0\n"
175 "rol %B0\n"
176 "rol %C0\n"
177 "mov %D0, %C0\n"
178 "mov %C0, %B0\n"
179 "mov %B0, %A0\n"
180 "mov %A0, __zero_reg__\n"
181 : "=d" (a)
182 : "0" (a)
183 :
184 );
185
186 return a;
187}
188
189template <>
190uint32_t lshift<10U>(uint32_t a)
191{
192 asm(
193 "lsl %A0\n"
194 "rol %B0\n"
195 "rol %C0\n"
196 "lsl %A0\n"
197 "rol %B0\n"
198 "rol %C0\n"
199 "mov %D0, %C0\n"
200 "mov %C0, %B0\n"
201 "mov %B0, %A0\n"
202 "mov %A0, __zero_reg__\n"
203 : "=d" (a)
204 : "0" (a)
205 :
206 );
207
208 return a;
209}
210
211template <>
212uint32_t lshift<11U>(uint32_t a)
213{
214 asm(
215 "lsl %A0\n"
216 "rol %B0\n"
217 "rol %C0\n"
218 "lsl %A0\n"
219 "rol %B0\n"
220 "rol %C0\n"
221 "lsl %A0\n"
222 "rol %B0\n"
223 "rol %C0\n"
224 "mov %D0, %C0\n"
225 "mov %C0, %B0\n"
226 "mov %B0, %A0\n"
227 "mov %A0, __zero_reg__\n"
228 : "=d" (a)
229 : "0" (a)
230 :
231 );
232
233 return a;
234}
235
236template <>
237uint32_t lshift<12U>(uint32_t a)
238{
239 asm(
240 "swap %C0\n"
241 "andi %C0, 240\n"
242 "swap %B0\n"
243 "eor %C0, %B0\n"
244 "andi %B0, 240\n"
245 "eor %C0, %B0\n"
246 "swap %A0\n"
247 "eor %B0, %A0\n"
248 "andi %A0, 240\n"
249 "eor %B0, %A0\n"
250 "mov %D0, %C0\n"
251 "mov %C0, %B0\n"
252 "mov %B0, %A0\n"
253 "mov %A0, __zero_reg__\n"
254 : "=d" (a)
255 : "0" (a)
256 :
257 );
258
259 return a;
260}
261
262template <>
263uint32_t lshift<13U>(uint32_t a)
264{
265 asm(
266 "swap %C0\n"
267 "andi %C0, 240\n"
268 "swap %B0\n"
269 "eor %C0, %B0\n"
270 "andi %B0, 240\n"
271 "eor %C0, %B0\n"
272 "swap %A0\n"
273 "eor %B0, %A0\n"
274 "andi %A0, 240\n"
275 "eor %B0, %A0\n"
276 "lsl %A0\n"
277 "rol %B0\n"
278 "rol %C0\n"
279 "mov %D0, %C0\n"
280 "mov %C0, %B0\n"
281 "mov %B0, %A0\n"
282 "mov %A0, __zero_reg__\n"
283 : "=d" (a)
284 : "0" (a)
285 :
286 );
287
288 return a;
289}
290
291template <>
292uint32_t lshift<14U>(uint32_t a)
293{
294 asm(
295 "movw r18, %A0\n"
296 "lsr %C0\n"
297 "ror r19\n"
298 "ror r18\n"
299 "mov %B0, __zero_reg__\n"
300 "ror %B0\n"
301 "lsr %C0\n"
302 "ror r19\n"
303 "ror r18\n"
304 "ror %B0\n"
305 "mov %A0, __zero_reg__\n"
306 "movw %C0, r18\n"
307 : "=d" (a)
308 : "0" (a)
309 : "r18", "r19"
310 );
311
312 return a;
313}
314
315template <>
316uint32_t lshift<15U>(uint32_t a)
317{
318 asm(
319 "movw r18, %A0\n"
320 "lsr %C0\n"
321 "ror r19\n"
322 "ror r18\n"
323 "mov %B0, __zero_reg__\n"
324 "ror %B0\n"
325 "mov %A0, __zero_reg__\n"
326 "movw %C0, r18\n"
327 : "=d" (a)
328 : "0" (a)
329 : "r18", "r19"
330 );
331
332 return a;
333}
335
336#pragma GCC diagnostic pop
337
338#endif
339
347template <uint8_t b>
348static inline uint32_t rshift(uint32_t a) {
349#if USE_OPTIMIZED_SHIFTS==1 // The shifts below have been validated to produce performant code in GCC.
350 // Other shift amounts are either in a specialized template below (good) or are unvalidated (bad).
351 static_assert(b==1 || b==2 || b==8 || b==16 || b==24,
352 "Unvalidated shift - confirm gcc produces performant code");
353#endif
354 return a >> b;
355}
356
357#if USE_OPTIMIZED_SHIFTS==1
358
359#pragma GCC diagnostic push
360#pragma GCC diagnostic ignored "-Wunused-function"
361
369template <>
370uint32_t rshift<3U>(uint32_t a)
371{
372 asm(
373 "lsr %D0\n"
374 "ror %C0\n"
375 "ror %B0\n"
376 "ror %A0\n"
377 "lsr %D0\n"
378 "ror %C0\n"
379 "ror %B0\n"
380 "ror %A0\n"
381 "lsr %D0\n"
382 "ror %C0\n"
383 "ror %B0\n"
384 "ror %A0\n"
385 : "=d" (a)
386 : "0" (a)
387 :
388 );
389
390 return a;
391}
392
393template <>
394uint32_t rshift<4U>(uint32_t a)
395{
396 asm(
397 "swap %A0\n"
398 "andi %A0, 15\n"
399 "swap %B0\n"
400 "eor %A0, %B0\n"
401 "andi %B0, 15\n"
402 "eor %A0, %B0\n"
403 "swap %C0\n"
404 "eor %B0, %C0\n"
405 "andi %C0, 15\n"
406 "eor %B0, %C0\n"
407 "swap %D0\n"
408 "eor %C0, %D0\n"
409 "andi %D0, 15\n"
410 "eor %C0, %D0\n"
411 : "=d" (a)
412 : "0" (a)
413 :
414 );
415
416 return a;
417}
418
419template <>
420uint32_t rshift<5U>(uint32_t a)
421{
422 asm(
423 "swap %A0\n"
424 "andi %A0, 15\n"
425 "swap %B0\n"
426 "eor %A0, %B0\n"
427 "andi %B0, 15\n"
428 "eor %A0, %B0\n"
429 "swap %C0\n"
430 "eor %B0, %C0\n"
431 "andi %C0, 15\n"
432 "eor %B0, %C0\n"
433 "swap %D0\n"
434 "eor %C0, %D0\n"
435 "andi %D0, 15\n"
436 "eor %C0, %D0\n"
437 "lsr %D0\n"
438 "ror %C0\n"
439 "ror %B0\n"
440 "ror %A0\n"
441 : "=d" (a)
442 : "0" (a)
443 :
444 );
445
446 return a;
447}
448
449template <>
450uint32_t rshift<6U>(uint32_t a)
451{
452 asm(
453 "lsl %A0\n"
454 "rol %B0\n"
455 "rol %C0\n"
456 "rol %D0\n"
457 "mov r19, __zero_reg__\n"
458 "rol r19\n"
459 "lsl %A0\n"
460 "rol %B0\n"
461 "rol %C0\n"
462 "rol %D0\n"
463 "rol r19\n"
464 "mov %A0, %B0\n"
465 "mov %B0, %C0\n"
466 "mov r18, %D0\n"
467 "movw %C0, r18\n"
468 : "=d" (a)
469 : "0" (a)
470 : "r18", "r19"
471 );
472
473 return a;
474}
475
476template <>
477uint32_t rshift<7U>(uint32_t a)
478{
479 asm(
480 "lsl %A0\n"
481 "rol %B0\n"
482 "rol %C0\n"
483 "rol %D0\n"
484 "mov r19, __zero_reg__\n"
485 "rol r19\n"
486 "mov %A0, %B0\n"
487 "mov %B0, %C0\n"
488 "mov r18, %D0\n"
489 "movw %C0, r18\n"
490 : "=d" (a)
491 : "0" (a)
492 : "r18", "r19"
493 );
494
495 return a;
496}
497
498template <>
499uint32_t rshift<9U>(uint32_t a)
500{
501 asm(
502 "lsr %D0\n"
503 "ror %C0\n"
504 "ror %B0\n"
505 "mov %A0, %B0\n"
506 "mov %B0, %C0\n"
507 "mov %C0, %D0\n"
508 "mov %D0, __zero_reg__\n"
509 : "=d" (a)
510 : "0" (a)
511 :
512 );
513
514 return a;
515}
516
517template <>
518uint32_t rshift<10U>(uint32_t a)
519{
520 asm(
521 "lsr %D0\n"
522 "ror %C0\n"
523 "ror %B0\n"
524 "lsr %D0\n"
525 "ror %C0\n"
526 "ror %B0\n"
527 "mov %A0, %B0\n"
528 "mov %B0, %C0\n"
529 "mov %C0, %D0\n"
530 "mov %D0, __zero_reg__\n"
531 : "=d" (a)
532 : "0" (a)
533 :
534 );
535
536 return a;
537}
538
539template <>
540uint32_t rshift<11U>(uint32_t a)
541{
542 asm(
543 "lsr %D0\n"
544 "ror %C0\n"
545 "ror %B0\n"
546 "lsr %D0\n"
547 "ror %C0\n"
548 "ror %B0\n"
549 "lsr %D0\n"
550 "ror %C0\n"
551 "ror %B0\n"
552 "mov %A0, %B0\n"
553 "mov %B0, %C0\n"
554 "mov %C0, %D0\n"
555 "mov %D0, __zero_reg__\n"
556 : "=d" (a)
557 : "0" (a)
558 :
559 );
560
561 return a;
562}
563
564template <>
565uint32_t rshift<12U>(uint32_t a)
566{
567 asm(
568 "swap %B0\n"
569 "andi %B0, 15\n"
570 "swap %C0\n"
571 "eor %B0, %C0\n"
572 "andi %C0, 15\n"
573 "eor %B0, %C0\n"
574 "swap %D0\n"
575 "eor %C0, %D0\n"
576 "andi %D0, 15\n"
577 "eor %C0, %D0\n"
578 "mov %A0, %B0\n"
579 "mov %B0, %C0\n"
580 "mov %C0, %D0\n"
581 "mov %D0, __zero_reg__\n"
582 : "=d" (a)
583 : "0" (a)
584 :
585 );
586
587 return a;
588}
589
590template <>
591uint32_t rshift<13U>(uint32_t a)
592{
593 asm(
594 "swap %B0\n"
595 "andi %B0, 15\n"
596 "swap %C0\n"
597 "eor %B0, %C0\n"
598 "andi %C0, 15\n"
599 "eor %B0, %C0\n"
600 "swap %D0\n"
601 "eor %C0, %D0\n"
602 "andi %D0, 15\n"
603 "eor %C0, %D0\n"
604 "lsr %D0\n"
605 "ror %C0\n"
606 "ror %B0\n"
607 "mov %A0, %B0\n"
608 "mov %B0, %C0\n"
609 "mov %C0, %D0\n"
610 "mov %D0, __zero_reg__\n"
611 : "=d" (a)
612 : "0" (a)
613 :
614 );
615
616 return a;
617}
618
619template <>
620uint32_t rshift<14U>(uint32_t a)
621{
622 asm(
623 "movw r18, %C0\n"
624 "lsl %B0\n"
625 "rol r18\n"
626 "rol r19\n"
627 "mov %C0, __zero_reg__\n"
628 "rol %C0\n"
629 "lsl %B0\n"
630 "rol r18\n"
631 "rol r19\n"
632 "rol %C0\n"
633 "mov %D0, __zero_reg__\n"
634 "movw %A0, r18\n"
635 : "=d" (a)
636 : "0" (a)
637 : "r18", "r19"
638 );
639
640 return a;
641}
642
643template <>
644uint32_t rshift<15U>(uint32_t a)
645{
646 asm(
647 "movw r18, %C0\n"
648 "lsl %B0\n"
649 "rol r18\n"
650 "rol r19\n"
651 "mov %C0, __zero_reg__\n"
652 "rol %C0\n"
653 "mov %D0, __zero_reg__\n"
654 "movw %A0, r18\n"
655 : "=d" (a)
656 : "0" (a)
657 : "r18", "r19"
658 );
659
660 return a;
661}
662
664
665#pragma GCC diagnostic pop
666
667#endif
668
681template <uint8_t b>
682static inline uint32_t rshift_round(uint32_t a) {
683 return rshift<b>(a+(1UL<<(b-1UL)));
684}
685
uint32_t lshift< 6U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:122
uint32_t rshift< 4U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:394
uint32_t lshift< 15U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:316
uint32_t rshift< 3U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:370
static uint32_t lshift(uint32_t a)
Bitwise left shift - generic, unoptimized, case.
Definition bit_shifts.h:43
uint32_t rshift< 9U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:499
uint32_t lshift< 4U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:66
uint32_t lshift< 11U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:212
uint32_t lshift< 14U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:292
uint32_t rshift< 5U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:420
uint32_t rshift< 6U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:450
uint32_t rshift< 15U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:644
uint32_t rshift< 10U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:518
uint32_t lshift< 10U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:190
uint32_t lshift< 12U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:237
uint32_t rshift< 13U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:591
uint32_t rshift< 14U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:620
static uint32_t rshift_round(uint32_t a)
Rounded arithmetic right shift.
Definition bit_shifts.h:682
uint32_t lshift< 9U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:171
static uint32_t rshift(uint32_t a)
Bitwise right shift - generic, unoptimized, case.
Definition bit_shifts.h:348
uint32_t lshift< 13U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:263
uint32_t rshift< 11U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:540
uint32_t rshift< 12U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:565
uint32_t rshift< 7U >(uint32_t a)
uint32_t bitwise right shift optimised for the specified shift distance
Definition bit_shifts.h:477
uint32_t lshift< 5U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:92
uint32_t lshift< 7U >(uint32_t a)
uint32_t bitwise left shift optimised for the specified shift distance
Definition bit_shifts.h:149
static uint8_t a
Definition maths.cpp:7