Quelle round.S

Sprache: Sparc

|
| round.sa 3.4 7/29/91
|
| handle rounding and normalization tasks
|
|
|
|  Copyright (C) Motorola, Inc. 1990
|   All Rights Reserved
|
|       For details on the license for this file, please see the
|       file, README, in this same directory.

|ROUND idnt    2,1 | Motorola 040 Floating Point Software Package

|section 8

#include "fpsp.h"

|
| round --- round result according to precision/mode
|
| a0 points to the input operand in the internal extended format
| d1(high word) contains rounding precision:
|  ext = $0000xxxx
|  sgl = $0001xxxx
|  dbl = $0002xxxx
| d1(low word) contains rounding mode:
|  RN  = $xxxx0000
|  RZ  = $xxxx0001
|  RM  = $xxxx0010
|  RP  = $xxxx0011
| d0{31:29} contains the g,r,s bits (extended)
|
| On return the value pointed to by a0 is correctly rounded,
| a0 is preserved and the g-r-s bits in d0 are cleared.
| The result is not typed - the tag field is invalid.  The
| result is still in the internal extended format.
|
| The INEX bit of USER_FPSR will be set if the rounded result was
| inexact (i.e. if any of the g-r-s bits were set).
|

.global round
round:
| If g=r=s=0 then result is exact and round is done, else set
| the inex flag in status reg and continue.
|
bsrs ext_grs   |this subroutine looks at the
|     :rounding precision and sets
|     ;the appropriate g-r-s bits.
tstl %d0   |if grs are zero, go force
bne rnd_cont  |lower bits to zero for size

swap %d1   |set up d1.w for round prec.
bra truncate

rnd_cont:
|
| Use rounding mode as an index into a jump table for these modes.
|
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
lea mode_tab,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
|
| Jump table indexed by rounding mode in d1.w.  All following assumes
| grs != 0.
|
mode_tab:
.long rnd_near
.long rnd_zero
.long rnd_mnus
.long rnd_plus
|
| ROUND PLUS INFINITY
|
| If sign of fp number = 0 (positive), then add 1 to l.
|
rnd_plus:
swap %d1   |set up d1 for round prec.
tstb LOCAL_SGN(%a0)  |check for sign
bmi truncate  |if positive then truncate
movel #0xffffffff,%d0  |force g,r,s to be all f's
lea add_to_l,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
|
| ROUND MINUS INFINITY
|
| If sign of fp number = 1 (negative), then add 1 to l.
|
rnd_mnus:
swap %d1   |set up d1 for round prec.
tstb LOCAL_SGN(%a0)  |check for sign
bpl truncate  |if negative then truncate
movel #0xffffffff,%d0  |force g,r,s to be all f's
lea add_to_l,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
|
| ROUND ZERO
|
| Always truncate.
rnd_zero:
swap %d1   |set up d1 for round prec.
bra truncate
|
|
| ROUND NEAREST
|
| If (g=1), then add 1 to l and if (r=s=0), then clear l
| Note that this will round to even in case of a tie.
|
rnd_near:
swap %d1   |set up d1 for round prec.
asll #1,%d0   |shift g-bit to c-bit
bcc truncate  |if (g=1) then
lea add_to_l,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)

|
| ext_grs --- extract guard, round and sticky bits
|
| Input: d1 =  PREC:ROUND
| Output: d0{31:29}= guard, round, sticky
|
| The ext_grs extract the guard/round/sticky bits according to the
| selected rounding precision. It is called by the round subroutine
| only.  All registers except d0 are kept intact. d0 becomes an
| updated guard,round,sticky in d0{31:29}
|
| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
|  prior to usage, and needs to restore d1 to original.
|
ext_grs:
swap %d1   |have d1.w point to round precision
cmpiw #0,%d1
bnes sgl_or_dbl
bras end_ext_grs

sgl_or_dbl:
moveml %d2/%d3,-(%a7)  |make some temp registers
cmpiw #1,%d1
bnes grs_dbl
grs_sgl:
bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right
movel #30,%d2   |of the sgl prec. limits
lsll %d2,%d3   |shift g-r bits to MSB of d3
movel LOCAL_HI(%a0),%d2  |get word 2 for s-bit test
andil #0x0000003f,%d2  |s bit is the or of all other
bnes st_stky   |bits to the right of g-r
tstl LOCAL_LO(%a0)  |test lower mantissa
bnes st_stky   |if any are set, set sticky
tstl %d0   |test original g,r,s
bnes st_stky   |if any are set, set sticky
bras end_sd   |if words 3 and 4 are clr, exit
grs_dbl:
bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right
movel #30,%d2   |of the dbl prec. limits
lsll %d2,%d3   |shift g-r bits to the MSB of d3
movel LOCAL_LO(%a0),%d2  |get lower mantissa  for s-bit test
andil #0x000001ff,%d2  |s bit is the or-ing of all
bnes st_stky   |other bits to the right of g-r
tstl %d0   |test word original g,r,s
bnes st_stky   |if any are set, set sticky
bras end_sd   |if clear, exit
st_stky:
bset #rnd_stky_bit,%d3
end_sd:
movel %d3,%d0   |return grs to d0
moveml (%a7)+,%d2/%d3  |restore scratch registers
end_ext_grs:
swap %d1   |restore d1 to original
rts

|*******************  Local Equates
.set ad_1_sgl,0x00000100 |  constant to add 1 to l-bit in sgl prec
.set ad_1_dbl,0x00000800 |  constant to add 1 to l-bit in dbl prec

|Jump table for adding 1 to the l-bit indexed by rnd prec

add_to_l:
.long add_ext
.long add_sgl
.long add_dbl
.long add_dbl
|
| ADD SINGLE
|
add_sgl:
addl #ad_1_sgl,LOCAL_HI(%a0)
bccs scc_clr   |no mantissa overflow
roxrw  LOCAL_HI(%a0)  |shift v-bit back in
roxrw  LOCAL_HI+2(%a0)  |shift v-bit back in
addw #0x1,LOCAL_EX(%a0) |and incr exponent
scc_clr:
tstl %d0   |test for rs = 0
bnes sgl_done
andiw  #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit
sgl_done:
andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
clrl LOCAL_LO(%a0)  |clear d2
rts

|
| ADD EXTENDED
|
add_ext:
addql  #1,LOCAL_LO(%a0)  |add 1 to l-bit
bccs xcc_clr   |test for carry out
addql  #1,LOCAL_HI(%a0)  |propagate carry
bccs xcc_clr
roxrw  LOCAL_HI(%a0)  |mant is 0 so restore v-bit
roxrw  LOCAL_HI+2(%a0)  |mant is 0 so restore v-bit
roxrw LOCAL_LO(%a0)
roxrw LOCAL_LO+2(%a0)
addw #0x1,LOCAL_EX(%a0) |and inc exp
xcc_clr:
tstl %d0   |test rs = 0
bnes add_ext_done
andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit
add_ext_done:
rts
|
| ADD DOUBLE
|
add_dbl:
addl #ad_1_dbl,LOCAL_LO(%a0)
bccs dcc_clr
addql #1,LOCAL_HI(%a0)  |propagate carry
bccs dcc_clr
roxrw LOCAL_HI(%a0)  |mant is 0 so restore v-bit
roxrw LOCAL_HI+2(%a0)  |mant is 0 so restore v-bit
roxrw LOCAL_LO(%a0)
roxrw LOCAL_LO+2(%a0)
addw #0x1,LOCAL_EX(%a0) |incr exponent
dcc_clr:
tstl %d0   |test for rs = 0
bnes dbl_done
andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit

dbl_done:
andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
rts

error:
rts
|
| Truncate all other bits
|
trunct:
.long end_rnd
.long sgl_done
.long dbl_done
.long dbl_done

truncate:
lea trunct,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)

end_rnd:
rts

|
| NORMALIZE
|
| These routines (nrm_zero & nrm_set) normalize the unnorm.  This
| is done by shifting the mantissa left while decrementing the
| exponent.
|
| NRM_SET shifts and decrements until there is a 1 set in the integer
| bit of the mantissa (msb in d1).
|
| NRM_ZERO shifts and decrements until there is a 1 set in the integer
| bit of the mantissa (msb in d1) unless this would mean the exponent
| would go less than 0.  In that case the number becomes a denorm - the
| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
| normalized.
|
| Note that both routines have been optimized (for the worst case) and
| therefore do not have the easy to follow decrement/shift loop.
|
| NRM_ZERO
|
| Distance to first 1 bit in mantissa = X
| Distance to 0 from exponent = Y
| If X < Y
| Then
|   nrm_set
| Else
|   shift mantissa by Y
|   set exponent = 0
|
|input:
| FP_SCR1 = exponent, ms mantissa part, ls mantissa part
|output:
| L_SCR1{4} = fpte15 or ete15 bit
|
.global nrm_zero
nrm_zero:
movew LOCAL_EX(%a0),%d0
cmpw   #64,%d0          |see if exp > 64
bmis d0_less
bsr nrm_set  |exp > 64 so exp won't exceed 0
rts
d0_less:
moveml %d2/%d3/%d5/%d6,-(%a7)
movel LOCAL_HI(%a0),%d1
movel LOCAL_LO(%a0),%d2

bfffo %d1{#0:#32},%d3 |get the distance to the first 1
|    ;in ms mant
beqs ms_clr  |branch if no bits were set
cmpw %d3,%d0  |of X>Y
bmis greater  |then exp will go past 0 (neg) if
|    ;it is just shifted
bsr nrm_set  |else exp won't go past 0
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
greater:
movel %d2,%d6  |save ls mant in d6
lsll %d0,%d2  |shift ls mant by count
lsll %d0,%d1  |shift ms mant by count
movel #32,%d5
subl %d0,%d5  |make op a denorm by shifting bits
lsrl %d5,%d6  |by the number in the exp, then
|    ;set exp = 0.
orl %d6,%d1  |shift the ls mant bits into the ms mant
movel #0,%d0  |same as if decremented exp to 0
|    ;while shifting
movew %d0,LOCAL_EX(%a0)
movel %d1,LOCAL_HI(%a0)
movel %d2,LOCAL_LO(%a0)
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
ms_clr:
bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant
beqs all_clr  |branch if none set
addw #32,%d3
cmpw %d3,%d0  |if X>Y
bmis greater  |then branch
bsr nrm_set  |else exp won't go past 0
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
all_clr:
movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0.
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
|
| NRM_SET
|
.global nrm_set
nrm_set:
movel %d7,-(%a7)
bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
beqs lower  |branch if ms mant is all 0's

movel %d6,-(%a7)

subw %d7,LOCAL_EX(%a0) |sub exponent by count
movel LOCAL_HI(%a0),%d0 |d0 has ms mant
movel LOCAL_LO(%a0),%d1 |d1 has ls mant

lsll %d7,%d0  |shift first 1 to j bit position
movel %d1,%d6  |copy ls mant into d6
lsll %d7,%d6  |shift ls mant by count
movel %d6,LOCAL_LO(%a0) |store ls mant into memory
moveql #32,%d6
subl %d7,%d6  |continue shift
lsrl %d6,%d1  |shift off all bits but those that will
|    ;be shifted into ms mant
orl %d1,%d0  |shift the ls mant bits into the ms mant
movel %d0,LOCAL_HI(%a0) |store ms mant into memory
moveml (%a7)+,%d7/%d6 |restore registers
rts

|
| We get here if ms mant was = 0, and we assume ls mant has bits
| set (otherwise this would have been tagged a zero not a denorm).
|
lower:
movew LOCAL_EX(%a0),%d0 |d0 has exponent
movel LOCAL_LO(%a0),%d1 |d1 has ls mant
subw #32,%d0  |account for ms mant being all zeros
bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7)
subw %d7,%d0  |subtract shift count from exp
lsll %d7,%d1  |shift first 1 to integer bit in ms mant
movew %d0,LOCAL_EX(%a0) |store ms mant
movel %d1,LOCAL_HI(%a0) |store exp
clrl LOCAL_LO(%a0) |clear ls mant
movel (%a7)+,%d7
rts
|
| denorm --- denormalize an intermediate result
|
| Used by underflow.
|
| Input:
| a0  points to the operand to be denormalized
|   (in the internal extended format)
|
| d0:  rounding precision
| Output:
| a0  points to the denormalized result
|   (in the internal extended format)
|
| d0 is guard,round,sticky
|
| d0 comes into this routine with the rounding precision. It
| is then loaded with the denormalized exponent threshold for the
| rounding precision.
|

.global denorm
denorm:
btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000
beqs no_sgn_ext
bsetb #7,LOCAL_EX(%a0) |sign extend if it is so
no_sgn_ext:

cmpib #0,%d0  |if 0 then extended precision
bnes not_ext  |else branch

clrl %d1  |load d1 with ext threshold
clrl %d0  |clear the sticky flag
bsr dnrm_lp  |denormalize the number
tstb %d1  |check for inex
beq no_inex  |if clr, no inex
bras dnrm_inex |if set, set inex

not_ext:
cmpil #1,%d0  |if 1 then single precision
beqs load_sgl |else must be 2, double prec

load_dbl:
movew #dbl_thresh,%d1 |put copy of threshold in d1
movel %d1,%d0  |copy d1 into d0
subw LOCAL_EX(%a0),%d0 |diff = threshold - exp
cmpw #67,%d0  |if diff > 67 (mant + grs bits)
bpls chk_stky |then branch (all bits would be
|    ; shifted off in denorm routine)
clrl %d0  |else clear the sticky flag
bsr dnrm_lp  |denormalize the number
tstb %d1  |check flag
beqs no_inex  |if clr, no inex
bras dnrm_inex |if set, set inex

load_sgl:
movew #sgl_thresh,%d1 |put copy of threshold in d1
movel %d1,%d0  |copy d1 into d0
subw LOCAL_EX(%a0),%d0 |diff = threshold - exp
cmpw #67,%d0  |if diff > 67 (mant + grs bits)
bpls chk_stky |then branch (all bits would be
|    ; shifted off in denorm routine)
clrl %d0  |else clear the sticky flag
bsr dnrm_lp  |denormalize the number
tstb %d1  |check flag
beqs no_inex  |if clr, no inex
bras dnrm_inex |if set, set inex

chk_stky:
tstl LOCAL_HI(%a0) |check for any bits set
bnes set_stky
tstl LOCAL_LO(%a0) |check for any bits set
bnes set_stky
bras clr_mant
set_stky:
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
movel #0x20000000,%d0 |set sticky bit in return value
clr_mant:
movew %d1,LOCAL_EX(%a0)  |load exp with threshold
movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa)
movel #0,LOCAL_LO(%a0)  |set d2 = 0 (ms mantissa)
rts
dnrm_inex:
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
no_inex:
rts

|
| dnrm_lp --- normalize exponent/mantissa to specified threshold
|
| Input:
| a0  points to the operand to be denormalized
| d0{31:29} initial guard,round,sticky
| d1{15:0} denormalization threshold
| Output:
| a0  points to the denormalized operand
| d0{31:29} final guard,round,sticky
| d1.b  inexact flag:  all ones means inexact result
|
| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
| so that bfext can be used to extract the new low part of the mantissa.
| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
| is no LOCAL_GRS scratch word following it on the fsave frame.
|
.global dnrm_lp
dnrm_lp:
movel %d2,-(%sp)  |save d2 for temp use
btstb #E3,E_BYTE(%a6)  |test for type E3 exception
beqs not_E3   |not type E3 exception
bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky  bit
movel #29,%d0
lsll %d0,%d2   |shift g,r,s to their positions
movel %d2,%d0
not_E3:
movel (%sp)+,%d2  |restore d2
movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
movel %d0,FP_SCR2+LOCAL_GRS(%a6)
movel %d1,%d0   |copy the denorm threshold
subw LOCAL_EX(%a0),%d1  |d1 = threshold - uns exponent
bles no_lp   |d1 <= 0
cmpw #32,%d1
blts case_1   |0 = d1 < 32
cmpw #64,%d1
blts case_2   |32 <= d1 < 64
bra case_3   |d1 >= 64
|
| No normalization necessary
|
no_lp:
clrb %d1   |set no inex2 reported
movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s
rts
|
| case (0<d1<32)
|
case_1:
movel %d2,-(%sp)
movew %d0,LOCAL_EX(%a0)  |exponent = denorm threshold
movel #32,%d0
subw %d1,%d0   |d0 = 32 - d1
bfextu LOCAL_EX(%a0){%d0:#32},%d2
bfextu %d2{%d1:%d0},%d2  |d2 = new LOCAL_HI
bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO
bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S
movel %d2,LOCAL_HI(%a0)  |store new LOCAL_HI
movel %d1,LOCAL_LO(%a0)  |store new LOCAL_LO
clrb %d1
bftst %d0{#2:#30}
beqs c1nstky
bsetl #rnd_stky_bit,%d0
st %d1
c1nstky:
movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s
andil #0xe0000000,%d2  |clear all but G,R,S
tstl %d2   |test if original G,R,S are clear
beqs grs_clear
orl #0x20000000,%d0  |set sticky bit in d0
grs_clear:
andil #0xe0000000,%d0  |clear all but G,R,S
movel (%sp)+,%d2
rts
|
| case (32<=d1<64)
|
case_2:
movel %d2,-(%sp)
movew %d0,LOCAL_EX(%a0)  |unsigned exponent = threshold
subw #32,%d1   |d1 now between 0 and 32
movel #32,%d0
subw %d1,%d0   |d0 = 32 - d1
bfextu LOCAL_EX(%a0){%d0:#32},%d2
bfextu %d2{%d1:%d0},%d2  |d2 = new LOCAL_LO
bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S
bftst %d1{#2:#30}
bnes c2_sstky  |bra if sticky bit to be set
bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32}
bnes c2_sstky  |bra if sticky bit to be set
movel %d1,%d0
clrb %d1
bras end_c2
c2_sstky:
movel %d1,%d0
bsetl #rnd_stky_bit,%d0
st %d1
end_c2:
clrl LOCAL_HI(%a0)  |store LOCAL_HI = 0
movel %d2,LOCAL_LO(%a0)  |store LOCAL_LO
movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s
andil #0xe0000000,%d2  |clear all but G,R,S
tstl %d2   |test if original G,R,S are clear
beqs clear_grs
orl #0x20000000,%d0  |set sticky bit in d0
clear_grs:
andil #0xe0000000,%d0  |get rid of all but G,R,S
movel (%sp)+,%d2
rts
|
| d1 >= 64 Force the exponent to be the denorm threshold with the
| correct sign.
|
case_3:
movew %d0,LOCAL_EX(%a0)
tstw LOCAL_SGN(%a0)
bges c3con
c3neg:
orl #0x80000000,LOCAL_EX(%a0)
c3con:
cmpw #64,%d1
beqs sixty_four
cmpw #65,%d1
beqs sixty_five
|
| Shift value is out of range.  Set d1 for inex2 flag and
| return a zero with the given threshold.
|
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
movel #0x20000000,%d0
st %d1
rts

sixty_four:
movel LOCAL_HI(%a0),%d0
bfextu %d0{#2:#30},%d1
andil #0xc0000000,%d0
bras c3com

sixty_five:
movel LOCAL_HI(%a0),%d0
bfextu %d0{#1:#31},%d1
andil #0x80000000,%d0
lsrl #1,%d0   |shift high bit into R bit

c3com:
tstl %d1
bnes c3ssticky
tstl LOCAL_LO(%a0)
bnes c3ssticky
tstb FP_SCR2+LOCAL_GRS(%a6)
bnes c3ssticky
clrb %d1
bras c3end

c3ssticky:
bsetl #rnd_stky_bit,%d0
st %d1
c3end:
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
rts

|end

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.10 Sekunden (vorverarbeitet am 2026-04-29) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.