|
| slogn.sa 3.1 12/10/90
|
| slogn computes the natural logarithm of an
| input value. slognd does the same except the input value is a
| denormalized number. slognp1 computes log(1+X), and slognp1d
| computes log(1+X) for denormalized X.
|
| Input: Double-extended value in memory location pointed to by address
| register a0.
|
| Output: log(X) or log(1+X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 2 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program slogn takes approximately 190 cycles for input
| argument X such that |X-1| >= 1/16, which is the usual
| situation. For those arguments, slognp1 takes approximately
| 210 cycles. For the less common arguments, the program will
| run no worse than 10% slower.
|
| Algorithm:
| LOGN:
| Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
| u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
|
| Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
| significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
| 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
|
| Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
| log(1+u) = poly.
|
| Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
| by k*log(2) + (log(F) + poly). The values of log(F) are calculated
| beforehand and stored in the program.
|
| lognp1:
| Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
| u where u = 2X/(2+X). Otherwise, move on to Step 2.
|
| Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
| of the algorithm for LOGN and compute log(1+X) as
| k*log(2) + log(F) + poly where poly approximates log(1+u),
| u = (Y-F)/F.
|
| Implementation Notes:
| Note 1. There are 64 different possible values for F, thus 64 log(F)'s
| need to be tabulated. Moreover, the values of 1/F are also
| tabulated so that the division in (Y-F)/F can be performed by a
| multiplication.
|
| Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
| Y-F has to be calculated carefully when 1/2 <= X < 3/2.
|
| Note 3. To fully exploit the pipeline, polynomials are usually separated
| into two parts evaluated independently before being added up.
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| For details on the license for this file, please see the
| file, README, in this same directory.
|slogn idnt 2,1 | Motorola 040 Floating Point Software Package
.global slognd
slognd:
|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
|----normalize the input value by left shifting k bits (k to be determined
|----below), adjusting exponent and storing -k to ADJK
|----the value TWOTO100 is no longer needed.
|----Note that this code assumes the denormalized input is NON-ZERO.
moveml %d2-%d7,-(%a7) | ...save some registers
movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. #
movel 4(%a0),%d4
movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X)
clrl %d2 | ...D2 used for holding K
cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE
blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID
cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1
bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16]
LOGMAIN:
|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
|-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
|--LOG(1+U) CAN BE VERY EFFICIENT.
|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
|--GET K, Y, F, AND ADDRESS OF 1/F.
asrl #8,%d0
asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X
subil #0x3FFF,%d0 | ...THIS IS K
addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F)
fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT
|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X
movel XFRAC(%a6),FFRAC(%a6)
andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F
andil #0x7E000000,%d0
asrl #8,%d0
asrl #8,%d0
asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT
addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F
fmovex X(%a6),%fp0
movel #0x3fff0000,F(%a6)
clrl F+8(%a6)
fsubx F(%a6),%fp0 | ...Y-F
fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY
|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
|--REGISTERS SAVED: FPCR, FP1, FP2
LP1CONT1:
|--AN RE-ENTRY POINT FOR LOGNP1
fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F
fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY
fmovex %fp0,%fp2
fmulx %fp2,%fp2 | ...FP2 IS V=U*U
fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
|--LOG(1+U) IS APPROXIMATED BY
|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
|--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
fmovel %d1,%fpcr
faddx KLOG2(%a6),%fp0 | ...FINAL ADD
bra t_frcinx
LOGNEAR1:
|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
fmovex %fp0,%fp1
fsubs one,%fp1 | ...FP1 IS X-1
fadds one,%fp0 | ...FP0 IS X+1
faddx %fp1,%fp1 | ...FP1 IS 2(X-1)
|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
LP1CONT2:
|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
fdivx %fp0,%fp1 | ...FP1 IS U
fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
|--LET V=U*U, W=V*V, CALCULATE
|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
|--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
fmovex %fp1,%fp0
fmulx %fp0,%fp0 | ...FP0 IS V
fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS W
fmovel %d1,%fpcr
faddx SAVEU(%a6),%fp0
bra t_frcinx
rts
LOGNEG:
|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
bra t_operr
.global slognp1d
slognp1d:
|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
| Simply return the denorm
bra t_extdnrm
.global slognp1
slognp1:
|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
fmovex (%a0),%fp0 | ...LOAD INPUT
fabsx %fp0 |test magnitude
fcmpx LTHOLD,%fp0 |compare with min threshold
fbgt LP1REAL |if greater, continue
fmovel #0,%fpsr |clr N flag from compare
fmovel %d1,%fpcr
fmovex (%a0),%fp0 |return signed argument
bra t_frcinx
LP1REAL:
fmovex (%a0),%fp0 | ...LOAD INPUT
movel #0x00000000,ADJK(%a6)
fmovex %fp0,%fp1 | ...FP1 IS INPUT Z
fadds one,%fp0 | ...X := ROUND(1+Z)
fmovex %fp0,X(%a6)
movew XFRAC(%a6),XDCARE(%a6)
movel X(%a6),%d0
cmpil #0,%d0
ble LP1NEG0 | ...LOG OF ZERO OR -VE
cmp2l BOUNDS2,%d0
bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
LP1NEAR1:
|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
cmp2l BOUNDS1,%d0
bcss LP1CARE
LP1ONE16:
|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
faddx %fp1,%fp1 | ...FP1 IS 2Z
fadds one,%fp0 | ...FP0 IS 1+X
|--U = FP1/FP0
bra LP1CONT2
LP1CARE:
|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
|--THERE ARE ONLY TWO CASES.
|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
|--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.