|
| stan.sa 3.3 7/29/91
|
| The entry point stan computes the tangent of
| an input argument;
| stand does the same except for denormalized input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value tan(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulp in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program sTAN takes approximately 170 cycles for
| input argument X such that |X| < 15Pi, which is the usual
| situation.
|
| Algorithm:
|
| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
| k = N mod 2, so in particular, k = 0 or 1.
|
| 3. If k is odd, go to 5.
|
| 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
| rational function U/V where
| U = r + r*s*(P1 + s*(P2 + s*P3)), and
| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
| Exit.
|
| 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
| rational function U/V where
| U = r + r*s*(P1 + s*(P2 + s*P3)), and
| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
| -Cot(r) = -V/U. Exit.
|
| 6. If |X| > 1, go to 8.
|
| 7. (|X|<2**(-40)) Tan(X) = X. Exit.
|
| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| For details on the license for this file, please see the
| file, README, in this same directory.
|STAN idnt 2,1 | Motorola 040 Floating Point Software Package
fmovel %d1,%fpcr |restore users exceptions
fdivx (%sp)+,%fp0 |last inst - possible exception set
bra t_frcinx
TANBORS:
|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|--IF |X| < 2**(-40), RETURN X OR 1.
cmpil #0x3FFF8000,%d0
bgts REDUCEX
TANSM:
fmovex %fp0,-(%sp)
fmovel %d1,%fpcr |restore users exceptions
fmovex (%sp)+,%fp0 |last inst - possible exception set
bra t_frcinx
REDUCEX:
|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
|--there is a danger of unwanted overflow in first LOOP iteration. In this
|--case, reduce argument by one remainder step to make subsequent reduction
|--safe.
cmpil #0x7ffeffff,%d0 |is argument dangerously large?
bnes LOOP
movel #0x7ffe0000,FP_SCR2(%a6) |yes
| ;create 2**16383*PI/2
movel #0xc90fdaa2,FP_SCR2+4(%a6)
clrl FP_SCR2+8(%a6)
ftstx %fp0 |test sign of argument
movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
| ;PI/2 at FP_SCR3
movel #0x85a308d3,FP_SCR3+4(%a6)
clrl FP_SCR3+8(%a6)
fblt red_neg
orw #0x8000,FP_SCR2(%a6) |positive arg
orw #0x8000,FP_SCR3(%a6)
red_neg:
faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
fmovex %fp0,%fp1 |save high result in fp1
faddx FP_SCR3(%a6),%fp0 |low part of reduction
fsubx %fp0,%fp1 |determine low component of result
faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|--integer quotient will be stored in N
|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
LOOP:
fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
movew INARG(%a6),%d0
movel %d0,%a1 | ...save a copy of D0
andil #0x00007FFF,%d0
subil #0x00003FFF,%d0 | ...D0 IS K
cmpil #28,%d0
bles LASTLOOP
CONTLOOP:
subil #27,%d0 | ...D0 IS L := K-27
movel #0,ENDFLAG(%a6)
bras WORK
LASTLOOP:
clrl %d0 | ...D0 IS L := 0
movel #1,ENDFLAG(%a6)
WORK:
|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
movel #0xA2F9836E,FP_SCR1+4(%a6)
movel #0x4E44152A,FP_SCR1+8(%a6)
movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
fmovex %fp0,%fp2
fmulx FP_SCR1(%a6),%fp2
|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|--US THE DESIRED VALUE IN FLOATING POINT.
|--HIDE SIX CYCLES OF INSTRUCTION
movel %a1,%d2
swap %d2
andil #0x80000000,%d2
oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
movel %d2,TWOTO63(%a6)
|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|--P2 = 2**(L) * Piby2_2
fmovex %fp2,%fp4
fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
fmovex %fp2,%fp5
fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
fmovex %fp4,%fp3
|--we want P+p = W+w but |p| <= half ulp of P
|--Then, we need to compute A := R-P and a := r-p
faddx %fp5,%fp3 | ...FP3 is P
fsubx %fp3,%fp4 | ...W-P
fsubx %fp3,%fp0 | ...FP0 is A := R - P
faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
fmovex %fp0,%fp3 | ...FP3 A
fsubx %fp4,%fp1 | ...FP1 is a := r - p
|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|--|r| <= half ulp of R.
faddx %fp1,%fp0 | ...FP0 is R := A+a
|--No need to calculate r if this is the last loop
cmpil #0,%d0
bgt RESTORE
|--Need to calculate r
fsubx %fp0,%fp3 | ...A-R
faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
bra LOOP
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.