Table of Contents
8bit Divide - 8bit Result
Normal binary division
…with shifting in loop. (If I remember right - submitted by Graham at CSDb forum)
;normal binary division ASL $FD LDA #$00 ROL LDX #$08 .loop1 CMP $FC BCC *+4 SBC $FC ROL $FD ROL DEX BNE .loop1 LDX #$08 .loop2 CMP $FC BCC *+4 SBC $FC ROL $FE ASL DEX BNE .loop2
Divides the value in $FD by the value in $FC, 8 bit integer result in $FD, the first 8 fraction bits are in $FE.
Ofcourse both loops should be unrolled :) I didn't want to write down the unrolled code here.
doynax: The remainder (in the accumulator) in the fraction loop seems to overflow for divisors above $80. A BCS jumping directly from the top of the loop to the SBC and forcibly setting carry afterwards seems to work. Is there a cleaner solution?
Smaller version
; 8bit/8bit division ; by White Flame ; ; Input: num, denom in zeropage ; Output: num = quotient, .A = remainder lda #$00 ldx #$07 clc : rol num rol cmp denom bcc :+ sbc denom : dex bpl :-- rol num ; 19 bytes ; ; Best case = 154 cycles ; Worst case = 170 cycles ; ; With immediate denom: ; Best case = 146 cycles ; Worst case = 162 cycles ; ; Unrolled with variable denom: ; Best case = 106 cycles ; Worst case = 127 cycles ; ; Unrolled with immediate denom: ; Best case = 98 cycles ; Worst case = 111 cycles
If you don't understand what :, :–, :+ means. : is an anonymous label. bpl :–, for example, goes back two labels in the code.
Division using tables
Comes from CSDb forum ( source by… ???)
;This will divide two 8-bit numbers in some 90-150 cycles. ;The code can easily be extended to handle larger dividends.
_divu_8 lda div_b cmp #2 bcs + ; >= 2 lda div_a rts + ldx #8 - dex asl bcc - bne + lda div_a - lsr dex bne - rts + tay lda r0_table,y ldy div_a sta zp8_1 sta zp8_2 eor #$ff sta zp8_3 sta zp8_4 sec lda (zp8_1),y sbc (zp8_3),y lda (zp8_2),y sbc (zp8_4),y clc adc div_a ror - lsr dex bne - rts div_a .byte $0 div_b .byte $0 r0_table .byte $01,$00,$fd,$00,$f9,$00,$f5,$00,$f1,$00,$ed,$00,$ea,$00,$e6,$00 .byte $e2,$00,$df,$00,$db,$00,$d8,$00,$d5,$00,$d1,$00,$ce,$00,$cb,$00 .byte $c8,$00,$c4,$00,$c1,$00,$be,$00,$bb,$00,$b8,$00,$b5,$00,$b3,$00 .byte $b0,$00,$ad,$00,$aa,$00,$a7,$00,$a5,$00,$a2,$00,$9f,$00,$9d,$00 .byte $9a,$00,$98,$00,$95,$00,$93,$00,$90,$00,$8e,$00,$8b,$00,$89,$00 .byte $87,$00,$84,$00,$82,$00,$80,$00,$7e,$00,$7b,$00,$79,$00,$77,$00 .byte $75,$00,$73,$00,$71,$00,$6f,$00,$6d,$00,$6b,$00,$69,$00,$67,$00 .byte $65,$00,$63,$00,$61,$00,$5f,$00,$5d,$00,$5b,$00,$59,$00,$58,$00 .byte $56,$00,$54,$00,$52,$00,$51,$00,$4f,$00,$4d,$00,$4b,$00,$4a,$00 .byte $48,$00,$47,$00,$45,$00,$43,$00,$42,$00,$40,$00,$3f,$00,$3d,$00 .byte $3c,$00,$3a,$00,$39,$00,$37,$00,$36,$00,$34,$00,$33,$00,$31,$00 .byte $30,$00,$2f,$00,$2d,$00,$2c,$00,$2a,$00,$29,$00,$28,$00,$26,$00 .byte $25,$00,$24,$00,$22,$00,$21,$00,$20,$00,$1f,$00,$1d,$00,$1c,$00 .byte $1b,$00,$1a,$00,$19,$00,$17,$00,$16,$00,$15,$00,$14,$00,$13,$00 .byte $12,$00,$10,$00,$0f,$00,$0e,$00,$0d,$00,$0c,$00,$0b,$00,$0a,$00 .byte $09,$00,$08,$00,$07,$00,$06,$00,$05,$00,$04,$00,$03,$00,$02,$00
The same routine again, slightly optimized
Let me bore you with an optimized version: ; divide acc by y, result in acc _divu_8 ldx t0_table,y stx b1+1 ldx t1_table,y beq + ldy r0_table,x sta zp8_1 sta zp8_2 eor #$ff sta zp8_3 sta zp8_4 sec lda (zp8_1),y sbc (zp8_3),y lda (zp8_2),y sbc (zp8_4),y clc adc zp8_1 ror + sec b1 bcs b1 lsr lsr lsr lsr lsr lsr lsr rts .align $100 r0_table .byte $01,$00,$fd,$00,$f9,$00,$f5,$00,$f1,$00,$ed,$00,$ea,$00,$e6,$00 .byte $e2,$00,$df,$00,$db,$00,$d8,$00,$d5,$00,$d1,$00,$ce,$00,$cb,$00 .byte $c8,$00,$c4,$00,$c1,$00,$be,$00,$bb,$00,$b8,$00,$b5,$00,$b3,$00 .byte $b0,$00,$ad,$00,$aa,$00,$a7,$00,$a5,$00,$a2,$00,$9f,$00,$9d,$00 .byte $9a,$00,$98,$00,$95,$00,$93,$00,$90,$00,$8e,$00,$8b,$00,$89,$00 .byte $87,$00,$84,$00,$82,$00,$80,$00,$7e,$00,$7b,$00,$79,$00,$77,$00 .byte $75,$00,$73,$00,$71,$00,$6f,$00,$6d,$00,$6b,$00,$69,$00,$67,$00 .byte $65,$00,$63,$00,$61,$00,$5f,$00,$5d,$00,$5b,$00,$59,$00,$58,$00 .byte $56,$00,$54,$00,$52,$00,$51,$00,$4f,$00,$4d,$00,$4b,$00,$4a,$00 .byte $48,$00,$47,$00,$45,$00,$43,$00,$42,$00,$40,$00,$3f,$00,$3d,$00 .byte $3c,$00,$3a,$00,$39,$00,$37,$00,$36,$00,$34,$00,$33,$00,$31,$00 .byte $30,$00,$2f,$00,$2d,$00,$2c,$00,$2a,$00,$29,$00,$28,$00,$26,$00 .byte $25,$00,$24,$00,$22,$00,$21,$00,$20,$00,$1f,$00,$1d,$00,$1c,$00 .byte $1b,$00,$1a,$00,$19,$00,$17,$00,$16,$00,$15,$00,$14,$00,$13,$00 .byte $12,$00,$10,$00,$0f,$00,$0e,$00,$0d,$00,$0c,$00,$0b,$00,$0a,$00 .byte $09,$00,$08,$00,$07,$00,$06,$00,$05,$00,$04,$00,$03,$00,$02,$00 t0_table .fill $100,0 t1_table .fill $100,0 _divu_8_setup ldy #1 next tya ldx #$ff - inx asl bcc - sta t1_table,y txa sta t0_table,y iny bne next rts
The init optimized (well, it packs better)
r0_table .byte $01,$fd,$f9,$f5,$f1,$ed,$ea,$e6 .byte $e2,$df,$db,$d8,$d5,$d1,$ce,$cb .byte $c8,$c4,$c1,$be,$bb,$b8,$b5,$b3 .byte $b0,$ad,$aa,$a7,$a5,$a2,$9f,$9d .byte $9a,$98,$95,$93,$90,$8e,$8b,$89 .byte $87,$84,$82,$80,$7e,$7b,$79,$77 .byte $75,$73,$71,$6f,$6d,$6b,$69,$67 .byte $65,$63,$61,$5f,$5d,$5b,$59,$58 .byte $56,$54,$52,$51,$4f,$4d,$4b,$4a .byte $48,$47,$45,$43,$42,$40,$3f,$3d .byte $3c,$3a,$39,$37,$36,$34,$33,$31 .byte $30,$2f,$2d,$2c,$2a,$29,$28,$26 .byte $25,$24,$22,$21,$20,$1f,$1d,$1c .byte $1b,$1a,$19,$17,$16,$15,$14,$13 .byte $12,$10,$0f,$0e,$0d,$0c,$0b,$0a .byte $09,$08,$07,$06,$05,$04,$03,$02 .fill $80,0 t0_table .fill $100,0 t1_table .fill $100,0 _divu_8_setup ldx #$7f ldy #$ff - lda #0 sta r0_table,y dey lda r0_table,x sta r0_table,y dey dex bpl - ldy #1 next tya ldx #$ff - inx asl bcc - sta t1_table,y txa sta t0_table,y iny bne next rts