[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[avr-libc-dev] Working octal code (FYI)
From: |
George Spelvin |
Subject: |
[avr-libc-dev] Working octal code (FYI) |
Date: |
16 Dec 2016 02:41:06 -0500 |
(Am I annoying everyone by using this mailing list as my personal
coding blog?)
After considerable rearranging (and fixing one nasty logic bug in
the first algorithm posted), I have octal converison working to my
satisfaction.
The logic bug was that I assumed I'd need at most one byte of zero-padding
to print a number. But I was checking for termination before printing
a digit. That ended up not working with 1-byte octal numbers where the
top digit is non-zero. By the time I was ready to print the fourth digit
(when the termination check would fire), the lsbyte wanted to hold bits
9..16, and that meant loading a *second* byte (bits 16..23).
So I changed to checking for termination *after* printing a digit,
which I knew would save time, but I unexpectedly found additional
space savings, too.
Not counting preamble code shared with decimal printing (all the
stuff before the label "3:"), it's down to 29 instructions. Still
a bit more than 20, but I'm satisfied.
It's even slightly faster than the previous code:
Bits Old New
0 56 42
8 144 113
16 276 232
24 364 314
32 496 430
40 546
48 628
56 744
64 860
/* Arguments */
#define out X /* Arrives in r24:r25, but we move it immediately */
#define out_lo r26
#define out_hi r27
#define bin Z /* Arrives in r22:r23, but we move it immediately */
#define bin_lo r30
#define bin_hi r31
#define len r20
#define flags r18 /* Mask, after removing two lsbits */
/* Local variables */
#define msb r25 /* Overlaps input */
#define lsb r24 /* Overlaps input */
#define digit r23 /* Overlaps input */
#define delta r22 /* Overlaps input */
#define tmask r21
// len = r20
#define k r19
// flags = r18
.text
.global binprint
.type binprint, @function
binprint:
movw out_lo, r24
movw bin_lo, r22
#if 1
add bin_lo, len
adc bin_hi, zero
#else
mov tmask, len
; Conditional negate using the standard identity -x = ~x + 1.
; Given mask of -1 or 0, (x ^ mask) - mask returns -x or x.
; However, we would need the carry bit clear to start this, and
; forming "mask" from the carry bit in one instruction preserves
; the carry bit. So instead add zero with carry.
lsr flags ; Lsbit is negate flag
sbc k, k ; Set to 0 or -1, carry preserved
1:
ld __tmp_reg__, bin
eor __tmp_reg__, k
adc __tmp_reg__, __zero_reg__
st bin+, __tmp_reg__
dec tmask
brne 1b
#endif
; Strip trailing (most-significant) zeros from bin */
2: dec len
breq 3f ; If we've reached the end, stop
ld __tmp_reg__, -bin
or __tmp_reg__, __tmp_reg__
breq 2b ; Continue as long as bytes are zero
3: movw bin_lo, r22 ; Reset bin to lsbyte
; Len is now pre-decremented
; Done with args in r22-r25; now allowed to use delta, digit, lsb, msb
ldi delta, 'A'-'0'-10
lsr flags
brcc 4f
ldi delta, 'a'-'0'-10
4: ldi msb, 1
ld lsb, bin+
.L_digit_out: ; Spit out a digit
mov digit, lsb
and digit, flags
cpi digit, 10
brcs 5f
add digit, delta ; Hex digit > 9
5: subi digit, -'0'
st X+, digit
; Check for done: is len:lsb < 0:flags?
cp flags, lsb
cpc __zero_reg__, len
brcc .L_epilogue ; if (!lsb && !len) return X
mov tmask, flags
.L_bitloop:
lsr msb
brne 7f ; if ((msb >>= 1) == 0) get another byte
; Fetch another byte
or len, len ; Preserves carry
breq 6f
dec len ; Preserves carry
ld msb, Z+
6: ror msb ; Shift carry=1 into msbit
7: ror lsb
lsr tmask
brne .L_bitloop ; if ((tmask >>= 1)== 0) {
rjmp .L_digit_out
.size binprint, .-binprint