1593 lines
33 KiB
ArmAsm
1593 lines
33 KiB
ArmAsm
/* ieee754-df.S double-precision floating point support for ARM
|
|
|
|
Copyright (C) 2003, 2004 Free Software Foundation, Inc.
|
|
Contributed by Nicolas Pitre (nico@cam.org)
|
|
|
|
This file is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 2, or (at your option) any
|
|
later version.
|
|
|
|
In addition to the permissions in the GNU General Public License, the
|
|
Free Software Foundation gives you unlimited permission to link the
|
|
compiled version of this file into combinations with other programs,
|
|
and to distribute those combinations without any restriction coming
|
|
from the use of this file. (The General Public License restrictions
|
|
do apply in other respects; for example, they cover modification of
|
|
the file, and distribution when not linked into a combine
|
|
executable.)
|
|
|
|
This file is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 59 Temple Place - Suite 330,
|
|
Boston, MA 02111-1307, USA. */
|
|
|
|
|
|
/* We need to know what prefix to add to function names. */
|
|
|
|
#define L_muldivdf3
|
|
#define L_addsubdf3
|
|
#define L_cmpdf2
|
|
#define L_fixdfsi
|
|
#define __USER_LABEL_PREFIX__
|
|
|
|
|
|
#ifdef __ARMEB__
|
|
#define al r1
|
|
#define ah r0
|
|
#else
|
|
#define al r0
|
|
#define ah r1
|
|
#endif
|
|
|
|
#ifndef __USER_LABEL_PREFIX__
|
|
#error __USER_LABEL_PREFIX__ not defined
|
|
#endif
|
|
|
|
/* ANSI concatenation macros. */
|
|
|
|
#define CONCAT1(a, b) CONCAT2(a, b)
|
|
#define CONCAT2(a, b) a ## b
|
|
|
|
/* Use the right prefix for global labels. */
|
|
|
|
#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
|
|
|
|
#ifdef __ELF__
|
|
#ifdef __thumb__
|
|
#define __PLT__ /* Not supported in Thumb assembler (for now). */
|
|
#else
|
|
#define __PLT__ (PLT)
|
|
#endif
|
|
#define TYPE(x) .type SYM(x),function
|
|
#define SIZE(x) .size SYM(x), . - SYM(x)
|
|
#define LSYM(x) .x
|
|
#else
|
|
#define __PLT__
|
|
#define TYPE(x)
|
|
#define SIZE(x)
|
|
#define LSYM(x) x
|
|
#endif
|
|
|
|
/* Function end macros. Variants for interworking. */
|
|
|
|
@ This selects the minimum architecture level required.
|
|
#define __ARM_ARCH__ 3
|
|
|
|
#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
|
|
|| defined(__ARM_ARCH_4T__)
|
|
/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
|
|
long multiply instructions. That includes v3M. */
|
|
# undef __ARM_ARCH__
|
|
# define __ARM_ARCH__ 4
|
|
#endif
|
|
|
|
#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
|
|
|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
|
|
|| defined(__ARM_ARCH_5TEJ__)
|
|
# undef __ARM_ARCH__
|
|
# define __ARM_ARCH__ 5
|
|
#endif
|
|
|
|
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
|
|
|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
|
|
|| defined(__ARM_ARCH_6ZK__)
|
|
# undef __ARM_ARCH__
|
|
# define __ARM_ARCH__ 6
|
|
#endif
|
|
|
|
/* How to return from a function call depends on the architecture variant. */
|
|
|
|
#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
|
|
|
|
# define RET bx lr
|
|
# define RETc(x) bx##x lr
|
|
|
|
/* Special precautions for interworking on armv4t. */
|
|
# if (__ARM_ARCH__ == 4)
|
|
|
|
/* Always use bx, not ldr pc. */
|
|
# if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
|
|
# define __INTERWORKING__
|
|
# endif /* __THUMB__ || __THUMB_INTERWORK__ */
|
|
|
|
/* Include thumb stub before arm mode code. */
|
|
# if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
|
|
# define __INTERWORKING_STUBS__
|
|
# endif /* __thumb__ && !__THUMB_INTERWORK__ */
|
|
|
|
#endif /* __ARM_ARCH == 4 */
|
|
|
|
#else
|
|
|
|
# define RET mov pc, lr
|
|
# define RETc(x) mov##x pc, lr
|
|
|
|
#endif
|
|
|
|
/* Don't pass dirn, it's there just to get token pasting right. */
|
|
|
|
.macro RETLDM regs=, cond=, dirn=ia
|
|
#if defined (__INTERWORKING__)
|
|
.ifc "\regs",""
|
|
ldr\cond lr, [sp], #4
|
|
.else
|
|
ldm\cond\dirn sp!, {\regs, lr}
|
|
.endif
|
|
bx\cond lr
|
|
#else
|
|
.ifc "\regs",""
|
|
ldr\cond pc, [sp], #4
|
|
.else
|
|
ldm\cond\dirn sp!, {\regs, pc}
|
|
.endif
|
|
#endif
|
|
.endm
|
|
|
|
|
|
.macro ARM_LDIV0
|
|
LSYM(Ldiv0):
|
|
str lr, [sp, #-4]!
|
|
bl SYM (__div0) __PLT__
|
|
mov r0, #0 @ About as wrong as it could be.
|
|
RETLDM
|
|
.endm
|
|
|
|
|
|
.macro THUMB_LDIV0
|
|
LSYM(Ldiv0):
|
|
push { lr }
|
|
bl SYM (__div0)
|
|
mov r0, #0 @ About as wrong as it could be.
|
|
#if defined (__INTERWORKING__)
|
|
pop { r1 }
|
|
bx r1
|
|
#else
|
|
pop { pc }
|
|
#endif
|
|
.endm
|
|
|
|
.macro FUNC_END name
|
|
SIZE (__\name)
|
|
.endm
|
|
|
|
.macro DIV_FUNC_END name
|
|
#if 0
|
|
@ How is this supposed to be valid? Multiply defined label ahoy...
|
|
LSYM(Ldiv0):
|
|
#endif
|
|
#ifdef __thumb__
|
|
THUMB_LDIV0
|
|
#else
|
|
ARM_LDIV0
|
|
#endif
|
|
FUNC_END \name
|
|
.endm
|
|
|
|
.macro THUMB_FUNC_START name
|
|
.globl SYM (\name)
|
|
TYPE (\name)
|
|
.thumb_func
|
|
SYM (\name):
|
|
.endm
|
|
|
|
/* Function start macros. Variants for ARM and Thumb. */
|
|
|
|
#ifdef __thumb__
|
|
#define THUMB_FUNC .thumb_func
|
|
#define THUMB_CODE .force_thumb
|
|
#else
|
|
#define THUMB_FUNC
|
|
#define THUMB_CODE
|
|
#endif
|
|
|
|
.macro FUNC_START name
|
|
.text
|
|
.globl SYM (__\name)
|
|
TYPE (__\name)
|
|
.align 0
|
|
THUMB_CODE
|
|
THUMB_FUNC
|
|
SYM (__\name):
|
|
.endm
|
|
|
|
/* Special function that will always be coded in ARM assembly, even if
|
|
in Thumb-only compilation. */
|
|
|
|
#if defined(__INTERWORKING_STUBS__)
|
|
.macro ARM_FUNC_START name
|
|
FUNC_START \name
|
|
bx pc
|
|
nop
|
|
.arm
|
|
/* A hook to tell gdb that we've switched to ARM mode. Also used to call
|
|
directly from other local arm routines. */
|
|
_L__\name:
|
|
.endm
|
|
#define EQUIV .thumb_set
|
|
/* Branch directly to a function declared with ARM_FUNC_START.
|
|
Must be called in arm mode. */
|
|
.macro ARM_CALL name
|
|
bl _L__\name
|
|
.endm
|
|
#else
|
|
.macro ARM_FUNC_START name
|
|
.text
|
|
.globl SYM (__\name)
|
|
TYPE (__\name)
|
|
.align 0
|
|
.arm
|
|
SYM (__\name):
|
|
.endm
|
|
#define EQUIV .set
|
|
.macro ARM_CALL name
|
|
bl __\name
|
|
.endm
|
|
#endif
|
|
|
|
.macro FUNC_ALIAS new old
|
|
.globl SYM (__\new)
|
|
#if defined (__thumb__)
|
|
.thumb_set SYM (__\new), SYM (__\old)
|
|
#else
|
|
.set SYM (__\new), SYM (__\old)
|
|
#endif
|
|
.endm
|
|
|
|
.macro ARM_FUNC_ALIAS new old
|
|
.globl SYM (__\new)
|
|
EQUIV SYM (__\new), SYM (__\old)
|
|
#if defined(__INTERWORKING_STUBS__)
|
|
.set SYM (_L__\new), SYM (_L__\old)
|
|
#endif
|
|
.endm
|
|
|
|
#ifdef __thumb__
|
|
/* Register aliases. */
|
|
|
|
work .req r4 @ XXXX is this safe ?
|
|
dividend .req r0
|
|
divisor .req r1
|
|
overdone .req r2
|
|
result .req r2
|
|
curbit .req r3
|
|
#endif
|
|
#if 0
|
|
ip .req r12
|
|
sp .req r13
|
|
lr .req r14
|
|
pc .req r15
|
|
#endif
|
|
|
|
/*
|
|
* Notes:
|
|
*
|
|
* The goal of this code is to be as fast as possible. This is
|
|
* not meant to be easy to understand for the casual reader.
|
|
* For slightly simpler code please see the single precision version
|
|
* of this file.
|
|
*
|
|
* Only the default rounding mode is intended for best performances.
|
|
* Exceptions aren't supported yet, but that can be added quite easily
|
|
* if necessary without impacting performances.
|
|
*/
|
|
|
|
|
|
@ For FPA, float words are always big-endian.
|
|
@ For VFP, floats words follow the memory system mode.
|
|
#if defined(__VFP_FP__) && !defined(__ARMEB__)
|
|
#define xl r0
|
|
#define xh r1
|
|
#define yl r2
|
|
#define yh r3
|
|
#else
|
|
#define xh r0
|
|
#define xl r1
|
|
#define yh r2
|
|
#define yl r3
|
|
#endif
|
|
|
|
|
|
#ifdef L_negdf2
|
|
|
|
ARM_FUNC_START negdf2
|
|
ARM_FUNC_ALIAS aeabi_dneg negdf2
|
|
|
|
@ flip sign bit
|
|
eor xh, xh, #0x80000000
|
|
RET
|
|
|
|
FUNC_END aeabi_dneg
|
|
FUNC_END negdf2
|
|
|
|
#endif
|
|
|
|
#ifdef L_addsubdf3
|
|
|
|
ARM_FUNC_START aeabi_drsub
|
|
|
|
eor xh, xh, #0x80000000 @ flip sign bit of first arg
|
|
b 1f
|
|
|
|
ARM_FUNC_START subdf3
|
|
ARM_FUNC_ALIAS aeabi_dsub subdf3
|
|
|
|
eor yh, yh, #0x80000000 @ flip sign bit of second arg
|
|
#if defined(__INTERWORKING_STUBS__)
|
|
b 1f @ Skip Thumb-code prologue
|
|
#endif
|
|
|
|
ARM_FUNC_START adddf3
|
|
ARM_FUNC_ALIAS aeabi_dadd adddf3
|
|
|
|
1: stmfd sp!, {r4, r5, lr}
|
|
|
|
@ Look for zeroes, equal values, INF, or NAN.
|
|
mov r4, xh, lsl #1
|
|
mov r5, yh, lsl #1
|
|
teq r4, r5
|
|
teqeq xl, yl
|
|
orrnes ip, r4, xl
|
|
orrnes ip, r5, yl
|
|
mvnnes ip, r4, asr #21
|
|
mvnnes ip, r5, asr #21
|
|
beq LSYM(Lad_s)
|
|
|
|
@ Compute exponent difference. Make largest exponent in r4,
|
|
@ corresponding arg in xh-xl, and positive exponent difference in r5.
|
|
mov r4, r4, lsr #21
|
|
rsbs r5, r4, r5, lsr #21
|
|
rsblt r5, r5, #0
|
|
ble 1f
|
|
add r4, r4, r5
|
|
eor yl, xl, yl
|
|
eor yh, xh, yh
|
|
eor xl, yl, xl
|
|
eor xh, yh, xh
|
|
eor yl, xl, yl
|
|
eor yh, xh, yh
|
|
1:
|
|
@ If exponent difference is too large, return largest argument
|
|
@ already in xh-xl. We need up to 54 bit to handle proper rounding
|
|
@ of 0x1p54 - 1.1.
|
|
cmp r5, #54
|
|
RETLDM "r4, r5" hi
|
|
|
|
@ Convert mantissa to signed integer.
|
|
tst xh, #0x80000000
|
|
mov xh, xh, lsl #12
|
|
mov ip, #0x00100000
|
|
orr xh, ip, xh, lsr #12
|
|
beq 1f
|
|
rsbs xl, xl, #0
|
|
rsc xh, xh, #0
|
|
1:
|
|
tst yh, #0x80000000
|
|
mov yh, yh, lsl #12
|
|
orr yh, ip, yh, lsr #12
|
|
beq 1f
|
|
rsbs yl, yl, #0
|
|
rsc yh, yh, #0
|
|
1:
|
|
@ If exponent == difference, one or both args were denormalized.
|
|
@ Since this is not common case, rescale them off line.
|
|
teq r4, r5
|
|
beq LSYM(Lad_d)
|
|
LSYM(Lad_x):
|
|
|
|
@ Compensate for the exponent overlapping the mantissa MSB added later
|
|
sub r4, r4, #1
|
|
|
|
@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
|
|
rsbs lr, r5, #32
|
|
blt 1f
|
|
mov ip, yl, lsl lr
|
|
adds xl, xl, yl, lsr r5
|
|
adc xh, xh, #0
|
|
adds xl, xl, yh, lsl lr
|
|
adcs xh, xh, yh, asr r5
|
|
b 2f
|
|
1: sub r5, r5, #32
|
|
add lr, lr, #32
|
|
cmp yl, #1
|
|
mov ip, yh, lsl lr
|
|
orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
|
|
adds xl, xl, yh, asr r5
|
|
adcs xh, xh, yh, asr #31
|
|
2:
|
|
@ We now have a result in xh-xl-ip.
|
|
@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
|
|
and r5, xh, #0x80000000
|
|
bpl LSYM(Lad_p)
|
|
rsbs ip, ip, #0
|
|
rscs xl, xl, #0
|
|
rsc xh, xh, #0
|
|
|
|
@ Determine how to normalize the result.
|
|
LSYM(Lad_p):
|
|
cmp xh, #0x00100000
|
|
bcc LSYM(Lad_a)
|
|
cmp xh, #0x00200000
|
|
bcc LSYM(Lad_e)
|
|
|
|
@ Result needs to be shifted right.
|
|
movs xh, xh, lsr #1
|
|
movs xl, xl, rrx
|
|
mov ip, ip, rrx
|
|
add r4, r4, #1
|
|
|
|
@ Make sure we did not bust our exponent.
|
|
mov r2, r4, lsl #21
|
|
cmn r2, #(2 << 21)
|
|
bcs LSYM(Lad_o)
|
|
|
|
@ Our result is now properly aligned into xh-xl, remaining bits in ip.
|
|
@ Round with MSB of ip. If halfway between two numbers, round towards
|
|
@ LSB of xl = 0.
|
|
@ Pack final result together.
|
|
LSYM(Lad_e):
|
|
cmp ip, #0x80000000
|
|
moveqs ip, xl, lsr #1
|
|
adcs xl, xl, #0
|
|
adc xh, xh, r4, lsl #20
|
|
orr xh, xh, r5
|
|
RETLDM "r4, r5"
|
|
|
|
@ Result must be shifted left and exponent adjusted.
|
|
LSYM(Lad_a):
|
|
movs ip, ip, lsl #1
|
|
adcs xl, xl, xl
|
|
adc xh, xh, xh
|
|
tst xh, #0x00100000
|
|
sub r4, r4, #1
|
|
bne LSYM(Lad_e)
|
|
|
|
@ No rounding necessary since ip will always be 0 at this point.
|
|
LSYM(Lad_l):
|
|
|
|
#if __ARM_ARCH__ < 5
|
|
|
|
teq xh, #0
|
|
movne r3, #20
|
|
moveq r3, #52
|
|
moveq xh, xl
|
|
moveq xl, #0
|
|
mov r2, xh
|
|
cmp r2, #(1 << 16)
|
|
movhs r2, r2, lsr #16
|
|
subhs r3, r3, #16
|
|
cmp r2, #(1 << 8)
|
|
movhs r2, r2, lsr #8
|
|
subhs r3, r3, #8
|
|
cmp r2, #(1 << 4)
|
|
movhs r2, r2, lsr #4
|
|
subhs r3, r3, #4
|
|
cmp r2, #(1 << 2)
|
|
subhs r3, r3, #2
|
|
sublo r3, r3, r2, lsr #1
|
|
sub r3, r3, r2, lsr #3
|
|
|
|
#else
|
|
|
|
teq xh, #0
|
|
moveq xh, xl
|
|
moveq xl, #0
|
|
clz r3, xh
|
|
addeq r3, r3, #32
|
|
sub r3, r3, #11
|
|
|
|
#endif
|
|
|
|
@ determine how to shift the value.
|
|
subs r2, r3, #32
|
|
bge 2f
|
|
adds r2, r2, #12
|
|
ble 1f
|
|
|
|
@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
|
|
@ since a register switch happened above.
|
|
add ip, r2, #20
|
|
rsb r2, r2, #12
|
|
mov xl, xh, lsl ip
|
|
mov xh, xh, lsr r2
|
|
b 3f
|
|
|
|
@ actually shift value left 1 to 20 bits, which might also represent
|
|
@ 32 to 52 bits if counting the register switch that happened earlier.
|
|
1: add r2, r2, #20
|
|
2: rsble ip, r2, #32
|
|
mov xh, xh, lsl r2
|
|
orrle xh, xh, xl, lsr ip
|
|
movle xl, xl, lsl r2
|
|
|
|
@ adjust exponent accordingly.
|
|
3: subs r4, r4, r3
|
|
addge xh, xh, r4, lsl #20
|
|
orrge xh, xh, r5
|
|
RETLDM "r4, r5" ge
|
|
|
|
@ Exponent too small, denormalize result.
|
|
@ Find out proper shift value.
|
|
mvn r4, r4
|
|
subs r4, r4, #31
|
|
bge 2f
|
|
adds r4, r4, #12
|
|
bgt 1f
|
|
|
|
@ shift result right of 1 to 20 bits, sign is in r5.
|
|
add r4, r4, #20
|
|
rsb r2, r4, #32
|
|
mov xl, xl, lsr r4
|
|
orr xl, xl, xh, lsl r2
|
|
orr xh, r5, xh, lsr r4
|
|
RETLDM "r4, r5"
|
|
|
|
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
|
|
@ a register switch from xh to xl.
|
|
1: rsb r4, r4, #12
|
|
rsb r2, r4, #32
|
|
mov xl, xl, lsr r2
|
|
orr xl, xl, xh, lsl r4
|
|
mov xh, r5
|
|
RETLDM "r4, r5"
|
|
|
|
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
|
|
@ from xh to xl.
|
|
2: mov xl, xh, lsr r4
|
|
mov xh, r5
|
|
RETLDM "r4, r5"
|
|
|
|
@ Adjust exponents for denormalized arguments.
|
|
@ Note that r4 must not remain equal to 0.
|
|
LSYM(Lad_d):
|
|
teq r4, #0
|
|
eor yh, yh, #0x00100000
|
|
eoreq xh, xh, #0x00100000
|
|
addeq r4, r4, #1
|
|
subne r5, r5, #1
|
|
b LSYM(Lad_x)
|
|
|
|
|
|
LSYM(Lad_s):
|
|
mvns ip, r4, asr #21
|
|
mvnnes ip, r5, asr #21
|
|
beq LSYM(Lad_i)
|
|
|
|
teq r4, r5
|
|
teqeq xl, yl
|
|
beq 1f
|
|
|
|
@ Result is x + 0.0 = x or 0.0 + y = y.
|
|
teq r4, #0
|
|
moveq xh, yh
|
|
moveq xl, yl
|
|
RETLDM "r4, r5"
|
|
|
|
1: teq xh, yh
|
|
|
|
@ Result is x - x = 0.
|
|
movne xh, #0
|
|
movne xl, #0
|
|
RETLDM "r4, r5" ne
|
|
|
|
@ Result is x + x = 2x.
|
|
movs ip, r4, lsr #21
|
|
bne 2f
|
|
movs xl, xl, lsl #1
|
|
adcs xh, xh, xh
|
|
orrcs xh, xh, #0x80000000
|
|
RETLDM "r4, r5"
|
|
2: adds r4, r4, #(2 << 21)
|
|
addcc xh, xh, #(1 << 20)
|
|
RETLDM "r4, r5" cc
|
|
and r5, xh, #0x80000000
|
|
|
|
@ Overflow: return INF.
|
|
LSYM(Lad_o):
|
|
orr xh, r5, #0x7f000000
|
|
orr xh, xh, #0x00f00000
|
|
mov xl, #0
|
|
RETLDM "r4, r5"
|
|
|
|
@ At least one of x or y is INF/NAN.
|
|
@ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
|
|
@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
|
|
@ if either is NAN: return NAN
|
|
@ if opposite sign: return NAN
|
|
@ otherwise return xh-xl (which is INF or -INF)
|
|
LSYM(Lad_i):
|
|
mvns ip, r4, asr #21
|
|
movne xh, yh
|
|
movne xl, yl
|
|
mvneqs ip, r5, asr #21
|
|
movne yh, xh
|
|
movne yl, xl
|
|
orrs r4, xl, xh, lsl #12
|
|
orreqs r5, yl, yh, lsl #12
|
|
teqeq xh, yh
|
|
orrne xh, xh, #0x00080000 @ quiet NAN
|
|
RETLDM "r4, r5"
|
|
|
|
FUNC_END aeabi_dsub
|
|
FUNC_END subdf3
|
|
FUNC_END aeabi_dadd
|
|
FUNC_END adddf3
|
|
|
|
ARM_FUNC_START floatunsidf
|
|
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
|
|
|
|
teq r0, #0
|
|
moveq r1, #0
|
|
RETc(eq)
|
|
stmfd sp!, {r4, r5, lr}
|
|
mov r4, #0x400 @ initial exponent
|
|
add r4, r4, #(52-1 - 1)
|
|
mov r5, #0 @ sign bit is 0
|
|
.ifnc xl, r0
|
|
mov xl, r0
|
|
.endif
|
|
mov xh, #0
|
|
b LSYM(Lad_l)
|
|
|
|
FUNC_END aeabi_ui2d
|
|
FUNC_END floatunsidf
|
|
|
|
ARM_FUNC_START floatsidf
|
|
ARM_FUNC_ALIAS aeabi_i2d floatsidf
|
|
|
|
teq r0, #0
|
|
moveq r1, #0
|
|
RETc(eq)
|
|
stmfd sp!, {r4, r5, lr}
|
|
mov r4, #0x400 @ initial exponent
|
|
add r4, r4, #(52-1 - 1)
|
|
ands r5, r0, #0x80000000 @ sign bit in r5
|
|
rsbmi r0, r0, #0 @ absolute value
|
|
.ifnc xl, r0
|
|
mov xl, r0
|
|
.endif
|
|
mov xh, #0
|
|
b LSYM(Lad_l)
|
|
|
|
FUNC_END aeabi_i2d
|
|
FUNC_END floatsidf
|
|
|
|
ARM_FUNC_START extendsfdf2
|
|
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
|
|
|
|
movs r2, r0, lsl #1 @ toss sign bit
|
|
mov xh, r2, asr #3 @ stretch exponent
|
|
mov xh, xh, rrx @ retrieve sign bit
|
|
mov xl, r2, lsl #28 @ retrieve remaining bits
|
|
andnes r3, r2, #0xff000000 @ isolate exponent
|
|
teqne r3, #0xff000000 @ if not 0, check if INF or NAN
|
|
eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
|
|
RETc(ne) @ and return it.
|
|
|
|
teq r2, #0 @ if actually 0
|
|
teqne r3, #0xff000000 @ or INF or NAN
|
|
RETc(eq) @ we are done already.
|
|
|
|
@ value was denormalized. We can normalize it now.
|
|
stmfd sp!, {r4, r5, lr}
|
|
mov r4, #0x380 @ setup corresponding exponent
|
|
and r5, xh, #0x80000000 @ move sign bit in r5
|
|
bic xh, xh, #0x80000000
|
|
b LSYM(Lad_l)
|
|
|
|
FUNC_END aeabi_f2d
|
|
FUNC_END extendsfdf2
|
|
|
|
ARM_FUNC_START floatundidf
|
|
ARM_FUNC_ALIAS aeabi_ul2d floatundidf
|
|
|
|
orrs r2, r0, r1
|
|
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
mvfeqd f0, #0.0
|
|
#endif
|
|
RETc(eq)
|
|
|
|
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
@ For hard FPA code we want to return via the tail below so that
|
|
@ we can return the result in f0 as well as in r0/r1 for backwards
|
|
@ compatibility.
|
|
adr ip, LSYM(f0_ret)
|
|
stmfd sp!, {r4, r5, ip, lr}
|
|
#else
|
|
stmfd sp!, {r4, r5, lr}
|
|
#endif
|
|
|
|
mov r5, #0
|
|
b 2f
|
|
|
|
ARM_FUNC_START floatdidf
|
|
ARM_FUNC_ALIAS aeabi_l2d floatdidf
|
|
|
|
orrs r2, r0, r1
|
|
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
mvfeqd f0, #0.0
|
|
#endif
|
|
RETc(eq)
|
|
|
|
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
@ For hard FPA code we want to return via the tail below so that
|
|
@ we can return the result in f0 as well as in r0/r1 for backwards
|
|
@ compatibility.
|
|
adr ip, LSYM(f0_ret)
|
|
stmfd sp!, {r4, r5, ip, lr}
|
|
#else
|
|
stmfd sp!, {r4, r5, lr}
|
|
#endif
|
|
|
|
ands r5, ah, #0x80000000 @ sign bit in r5
|
|
bpl 2f
|
|
rsbs al, al, #0
|
|
rsc ah, ah, #0
|
|
2:
|
|
mov r4, #0x400 @ initial exponent
|
|
add r4, r4, #(52-1 - 1)
|
|
|
|
@ FPA little-endian: must swap the word order.
|
|
.ifnc xh, ah
|
|
mov ip, al
|
|
mov xh, ah
|
|
mov xl, ip
|
|
.endif
|
|
|
|
movs ip, xh, lsr #22
|
|
beq LSYM(Lad_p)
|
|
|
|
@ The value is too big. Scale it down a bit...
|
|
mov r2, #3
|
|
movs ip, ip, lsr #3
|
|
addne r2, r2, #3
|
|
movs ip, ip, lsr #3
|
|
addne r2, r2, #3
|
|
add r2, r2, ip, lsr #3
|
|
|
|
rsb r3, r2, #32
|
|
mov ip, xl, lsl r3
|
|
mov xl, xl, lsr r2
|
|
orr xl, xl, xh, lsl r3
|
|
mov xh, xh, lsr r2
|
|
add r4, r4, r2
|
|
b LSYM(Lad_p)
|
|
|
|
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
|
|
@ Legacy code expects the result to be returned in f0. Copy it
|
|
@ there as well.
|
|
LSYM(f0_ret):
|
|
stmfd sp!, {r0, r1}
|
|
ldfd f0, [sp], #8
|
|
RETLDM
|
|
|
|
#endif
|
|
|
|
FUNC_END floatdidf
|
|
FUNC_END aeabi_l2d
|
|
FUNC_END floatundidf
|
|
FUNC_END aeabi_ul2d
|
|
|
|
#endif /* L_addsubdf3 */
|
|
|
|
#ifdef L_muldivdf3
|
|
|
|
ARM_FUNC_START muldf3
|
|
ARM_FUNC_ALIAS aeabi_dmul muldf3
|
|
stmfd sp!, {r4, r5, r6, lr}
|
|
|
|
@ Mask out exponents, trap any zero/denormal/INF/NAN.
|
|
mov ip, #0xff
|
|
orr ip, ip, #0x700
|
|
ands r4, ip, xh, lsr #20
|
|
andnes r5, ip, yh, lsr #20
|
|
teqne r4, ip
|
|
teqne r5, ip
|
|
bleq LSYM(Lml_s)
|
|
|
|
@ Add exponents together
|
|
add r4, r4, r5
|
|
|
|
@ Determine final sign.
|
|
eor r6, xh, yh
|
|
|
|
@ Convert mantissa to unsigned integer.
|
|
@ If power of two, branch to a separate path.
|
|
bic xh, xh, ip, lsl #21
|
|
bic yh, yh, ip, lsl #21
|
|
orrs r5, xl, xh, lsl #12
|
|
orrnes r5, yl, yh, lsl #12
|
|
orr xh, xh, #0x00100000
|
|
orr yh, yh, #0x00100000
|
|
beq LSYM(Lml_1)
|
|
|
|
#if __ARM_ARCH__ < 4
|
|
|
|
@ Put sign bit in r6, which will be restored in yl later.
|
|
and r6, r6, #0x80000000
|
|
|
|
@ Well, no way to make it shorter without the umull instruction.
|
|
stmfd sp!, {r6, r7, r8, r9, sl, fp}
|
|
mov r7, xl, lsr #16
|
|
mov r8, yl, lsr #16
|
|
mov r9, xh, lsr #16
|
|
mov sl, yh, lsr #16
|
|
bic xl, xl, r7, lsl #16
|
|
bic yl, yl, r8, lsl #16
|
|
bic xh, xh, r9, lsl #16
|
|
bic yh, yh, sl, lsl #16
|
|
mul ip, xl, yl
|
|
mul fp, xl, r8
|
|
mov lr, #0
|
|
adds ip, ip, fp, lsl #16
|
|
adc lr, lr, fp, lsr #16
|
|
mul fp, r7, yl
|
|
adds ip, ip, fp, lsl #16
|
|
adc lr, lr, fp, lsr #16
|
|
mul fp, xl, sl
|
|
mov r5, #0
|
|
adds lr, lr, fp, lsl #16
|
|
adc r5, r5, fp, lsr #16
|
|
mul fp, r7, yh
|
|
adds lr, lr, fp, lsl #16
|
|
adc r5, r5, fp, lsr #16
|
|
mul fp, xh, r8
|
|
adds lr, lr, fp, lsl #16
|
|
adc r5, r5, fp, lsr #16
|
|
mul fp, r9, yl
|
|
adds lr, lr, fp, lsl #16
|
|
adc r5, r5, fp, lsr #16
|
|
mul fp, xh, sl
|
|
mul r6, r9, sl
|
|
adds r5, r5, fp, lsl #16
|
|
adc r6, r6, fp, lsr #16
|
|
mul fp, r9, yh
|
|
adds r5, r5, fp, lsl #16
|
|
adc r6, r6, fp, lsr #16
|
|
mul fp, xl, yh
|
|
adds lr, lr, fp
|
|
mul fp, r7, sl
|
|
adcs r5, r5, fp
|
|
mul fp, xh, yl
|
|
adc r6, r6, #0
|
|
adds lr, lr, fp
|
|
mul fp, r9, r8
|
|
adcs r5, r5, fp
|
|
mul fp, r7, r8
|
|
adc r6, r6, #0
|
|
adds lr, lr, fp
|
|
mul fp, xh, yh
|
|
adcs r5, r5, fp
|
|
adc r6, r6, #0
|
|
ldmfd sp!, {yl, r7, r8, r9, sl, fp}
|
|
|
|
#else
|
|
|
|
@ Here is the actual multiplication.
|
|
umull ip, lr, xl, yl
|
|
mov r5, #0
|
|
umlal lr, r5, xh, yl
|
|
and yl, r6, #0x80000000
|
|
umlal lr, r5, xl, yh
|
|
mov r6, #0
|
|
umlal r5, r6, xh, yh
|
|
|
|
#endif
|
|
|
|
@ The LSBs in ip are only significant for the final rounding.
|
|
@ Fold them into lr.
|
|
teq ip, #0
|
|
orrne lr, lr, #1
|
|
|
|
@ Adjust result upon the MSB position.
|
|
sub r4, r4, #0xff
|
|
cmp r6, #(1 << (20-11))
|
|
sbc r4, r4, #0x300
|
|
bcs 1f
|
|
movs lr, lr, lsl #1
|
|
adcs r5, r5, r5
|
|
adc r6, r6, r6
|
|
1:
|
|
@ Shift to final position, add sign to result.
|
|
orr xh, yl, r6, lsl #11
|
|
orr xh, xh, r5, lsr #21
|
|
mov xl, r5, lsl #11
|
|
orr xl, xl, lr, lsr #21
|
|
mov lr, lr, lsl #11
|
|
|
|
@ Check exponent range for under/overflow.
|
|
subs ip, r4, #(254 - 1)
|
|
cmphi ip, #0x700
|
|
bhi LSYM(Lml_u)
|
|
|
|
@ Round the result, merge final exponent.
|
|
cmp lr, #0x80000000
|
|
moveqs lr, xl, lsr #1
|
|
adcs xl, xl, #0
|
|
adc xh, xh, r4, lsl #20
|
|
RETLDM "r4, r5, r6"
|
|
|
|
@ Multiplication by 0x1p*: let''s shortcut a lot of code.
|
|
LSYM(Lml_1):
|
|
and r6, r6, #0x80000000
|
|
orr xh, r6, xh
|
|
orr xl, xl, yl
|
|
eor xh, xh, yh
|
|
subs r4, r4, ip, lsr #1
|
|
rsbgts r5, r4, ip
|
|
orrgt xh, xh, r4, lsl #20
|
|
RETLDM "r4, r5, r6" gt
|
|
|
|
@ Under/overflow: fix things up for the code below.
|
|
orr xh, xh, #0x00100000
|
|
mov lr, #0
|
|
subs r4, r4, #1
|
|
|
|
LSYM(Lml_u):
|
|
@ Overflow?
|
|
bgt LSYM(Lml_o)
|
|
|
|
@ Check if denormalized result is possible, otherwise return signed 0.
|
|
cmn r4, #(53 + 1)
|
|
movle xl, #0
|
|
bicle xh, xh, #0x7fffffff
|
|
RETLDM "r4, r5, r6" le
|
|
|
|
@ Find out proper shift value.
|
|
rsb r4, r4, #0
|
|
subs r4, r4, #32
|
|
bge 2f
|
|
adds r4, r4, #12
|
|
bgt 1f
|
|
|
|
@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
|
|
add r4, r4, #20
|
|
rsb r5, r4, #32
|
|
mov r3, xl, lsl r5
|
|
mov xl, xl, lsr r4
|
|
orr xl, xl, xh, lsl r5
|
|
and r2, xh, #0x80000000
|
|
bic xh, xh, #0x80000000
|
|
adds xl, xl, r3, lsr #31
|
|
adc xh, r2, xh, lsr r4
|
|
orrs lr, lr, r3, lsl #1
|
|
biceq xl, xl, r3, lsr #31
|
|
RETLDM "r4, r5, r6"
|
|
|
|
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
|
|
@ a register switch from xh to xl. Then round.
|
|
1: rsb r4, r4, #12
|
|
rsb r5, r4, #32
|
|
mov r3, xl, lsl r4
|
|
mov xl, xl, lsr r5
|
|
orr xl, xl, xh, lsl r4
|
|
bic xh, xh, #0x7fffffff
|
|
adds xl, xl, r3, lsr #31
|
|
adc xh, xh, #0
|
|
orrs lr, lr, r3, lsl #1
|
|
biceq xl, xl, r3, lsr #31
|
|
RETLDM "r4, r5, r6"
|
|
|
|
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
|
|
@ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
|
|
2: rsb r5, r4, #32
|
|
orr lr, lr, xl, lsl r5
|
|
mov r3, xl, lsr r4
|
|
orr r3, r3, xh, lsl r5
|
|
mov xl, xh, lsr r4
|
|
bic xh, xh, #0x7fffffff
|
|
bic xl, xl, xh, lsr r4
|
|
add xl, xl, r3, lsr #31
|
|
orrs lr, lr, r3, lsl #1
|
|
biceq xl, xl, r3, lsr #31
|
|
RETLDM "r4, r5, r6"
|
|
|
|
@ One or both arguments are denormalized.
|
|
@ Scale them leftwards and preserve sign bit.
|
|
LSYM(Lml_d):
|
|
teq r4, #0
|
|
bne 2f
|
|
and r6, xh, #0x80000000
|
|
1: movs xl, xl, lsl #1
|
|
adc xh, xh, xh
|
|
tst xh, #0x00100000
|
|
subeq r4, r4, #1
|
|
beq 1b
|
|
orr xh, xh, r6
|
|
teq r5, #0
|
|
movne pc, lr
|
|
2: and r6, yh, #0x80000000
|
|
3: movs yl, yl, lsl #1
|
|
adc yh, yh, yh
|
|
tst yh, #0x00100000
|
|
subeq r5, r5, #1
|
|
beq 3b
|
|
orr yh, yh, r6
|
|
mov pc, lr
|
|
|
|
LSYM(Lml_s):
|
|
@ Isolate the INF and NAN cases away
|
|
teq r4, ip
|
|
and r5, ip, yh, lsr #20
|
|
teqne r5, ip
|
|
beq 1f
|
|
|
|
@ Here, one or more arguments are either denormalized or zero.
|
|
orrs r6, xl, xh, lsl #1
|
|
orrnes r6, yl, yh, lsl #1
|
|
bne LSYM(Lml_d)
|
|
|
|
@ Result is 0, but determine sign anyway.
|
|
LSYM(Lml_z):
|
|
eor xh, xh, yh
|
|
bic xh, xh, #0x7fffffff
|
|
mov xl, #0
|
|
RETLDM "r4, r5, r6"
|
|
|
|
1: @ One or both args are INF or NAN.
|
|
orrs r6, xl, xh, lsl #1
|
|
moveq xl, yl
|
|
moveq xh, yh
|
|
orrnes r6, yl, yh, lsl #1
|
|
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
|
|
teq r4, ip
|
|
bne 1f
|
|
orrs r6, xl, xh, lsl #12
|
|
bne LSYM(Lml_n) @ NAN * <anything> -> NAN
|
|
1: teq r5, ip
|
|
bne LSYM(Lml_i)
|
|
orrs r6, yl, yh, lsl #12
|
|
movne xl, yl
|
|
movne xh, yh
|
|
bne LSYM(Lml_n) @ <anything> * NAN -> NAN
|
|
|
|
@ Result is INF, but we need to determine its sign.
|
|
LSYM(Lml_i):
|
|
eor xh, xh, yh
|
|
|
|
@ Overflow: return INF (sign already in xh).
|
|
LSYM(Lml_o):
|
|
and xh, xh, #0x80000000
|
|
orr xh, xh, #0x7f000000
|
|
orr xh, xh, #0x00f00000
|
|
mov xl, #0
|
|
RETLDM "r4, r5, r6"
|
|
|
|
@ Return a quiet NAN.
|
|
LSYM(Lml_n):
|
|
orr xh, xh, #0x7f000000
|
|
orr xh, xh, #0x00f80000
|
|
RETLDM "r4, r5, r6"
|
|
|
|
FUNC_END aeabi_dmul
|
|
FUNC_END muldf3
|
|
|
|
ARM_FUNC_START divdf3
|
|
ARM_FUNC_ALIAS aeabi_ddiv divdf3
|
|
|
|
stmfd sp!, {r4, r5, r6, lr}
|
|
|
|
@ Mask out exponents, trap any zero/denormal/INF/NAN.
|
|
mov ip, #0xff
|
|
orr ip, ip, #0x700
|
|
ands r4, ip, xh, lsr #20
|
|
andnes r5, ip, yh, lsr #20
|
|
teqne r4, ip
|
|
teqne r5, ip
|
|
bleq LSYM(Ldv_s)
|
|
|
|
@ Substract divisor exponent from dividend''s.
|
|
sub r4, r4, r5
|
|
|
|
@ Preserve final sign into lr.
|
|
eor lr, xh, yh
|
|
|
|
@ Convert mantissa to unsigned integer.
|
|
@ Dividend -> r5-r6, divisor -> yh-yl.
|
|
orrs r5, yl, yh, lsl #12
|
|
mov xh, xh, lsl #12
|
|
beq LSYM(Ldv_1)
|
|
mov yh, yh, lsl #12
|
|
mov r5, #0x10000000
|
|
orr yh, r5, yh, lsr #4
|
|
orr yh, yh, yl, lsr #24
|
|
mov yl, yl, lsl #8
|
|
orr r5, r5, xh, lsr #4
|
|
orr r5, r5, xl, lsr #24
|
|
mov r6, xl, lsl #8
|
|
|
|
@ Initialize xh with final sign bit.
|
|
and xh, lr, #0x80000000
|
|
|
|
@ Ensure result will land to known bit position.
|
|
@ Apply exponent bias accordingly.
|
|
cmp r5, yh
|
|
cmpeq r6, yl
|
|
adc r4, r4, #(255 - 2)
|
|
add r4, r4, #0x300
|
|
bcs 1f
|
|
movs yh, yh, lsr #1
|
|
mov yl, yl, rrx
|
|
1:
|
|
@ Perform first substraction to align result to a nibble.
|
|
subs r6, r6, yl
|
|
sbc r5, r5, yh
|
|
movs yh, yh, lsr #1
|
|
mov yl, yl, rrx
|
|
mov xl, #0x00100000
|
|
mov ip, #0x00080000
|
|
|
|
@ The actual division loop.
|
|
1: subs lr, r6, yl
|
|
sbcs lr, r5, yh
|
|
subcs r6, r6, yl
|
|
movcs r5, lr
|
|
orrcs xl, xl, ip
|
|
movs yh, yh, lsr #1
|
|
mov yl, yl, rrx
|
|
subs lr, r6, yl
|
|
sbcs lr, r5, yh
|
|
subcs r6, r6, yl
|
|
movcs r5, lr
|
|
orrcs xl, xl, ip, lsr #1
|
|
movs yh, yh, lsr #1
|
|
mov yl, yl, rrx
|
|
subs lr, r6, yl
|
|
sbcs lr, r5, yh
|
|
subcs r6, r6, yl
|
|
movcs r5, lr
|
|
orrcs xl, xl, ip, lsr #2
|
|
movs yh, yh, lsr #1
|
|
mov yl, yl, rrx
|
|
subs lr, r6, yl
|
|
sbcs lr, r5, yh
|
|
subcs r6, r6, yl
|
|
movcs r5, lr
|
|
orrcs xl, xl, ip, lsr #3
|
|
|
|
orrs lr, r5, r6
|
|
beq 2f
|
|
mov r5, r5, lsl #4
|
|
orr r5, r5, r6, lsr #28
|
|
mov r6, r6, lsl #4
|
|
mov yh, yh, lsl #3
|
|
orr yh, yh, yl, lsr #29
|
|
mov yl, yl, lsl #3
|
|
movs ip, ip, lsr #4
|
|
bne 1b
|
|
|
|
@ We are done with a word of the result.
|
|
@ Loop again for the low word if this pass was for the high word.
|
|
tst xh, #0x00100000
|
|
bne 3f
|
|
orr xh, xh, xl
|
|
mov xl, #0
|
|
mov ip, #0x80000000
|
|
b 1b
|
|
2:
|
|
@ Be sure result starts in the high word.
|
|
tst xh, #0x00100000
|
|
orreq xh, xh, xl
|
|
moveq xl, #0
|
|
3:
|
|
@ Check exponent range for under/overflow.
|
|
subs ip, r4, #(254 - 1)
|
|
cmphi ip, #0x700
|
|
bhi LSYM(Lml_u)
|
|
|
|
@ Round the result, merge final exponent.
|
|
subs ip, r5, yh
|
|
subeqs ip, r6, yl
|
|
moveqs ip, xl, lsr #1
|
|
adcs xl, xl, #0
|
|
adc xh, xh, r4, lsl #20
|
|
RETLDM "r4, r5, r6"
|
|
|
|
@ Division by 0x1p*: shortcut a lot of code.
|
|
LSYM(Ldv_1):
|
|
and lr, lr, #0x80000000
|
|
orr xh, lr, xh, lsr #12
|
|
adds r4, r4, ip, lsr #1
|
|
rsbgts r5, r4, ip
|
|
orrgt xh, xh, r4, lsl #20
|
|
RETLDM "r4, r5, r6" gt
|
|
|
|
orr xh, xh, #0x00100000
|
|
mov lr, #0
|
|
subs r4, r4, #1
|
|
b LSYM(Lml_u)
|
|
|
|
@ Result mightt need to be denormalized: put remainder bits
|
|
@ in lr for rounding considerations.
|
|
LSYM(Ldv_u):
|
|
orr lr, r5, r6
|
|
b LSYM(Lml_u)
|
|
|
|
@ One or both arguments is either INF, NAN or zero.
|
|
LSYM(Ldv_s):
|
|
and r5, ip, yh, lsr #20
|
|
teq r4, ip
|
|
teqeq r5, ip
|
|
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
|
|
teq r4, ip
|
|
bne 1f
|
|
orrs r4, xl, xh, lsl #12
|
|
bne LSYM(Lml_n) @ NAN / <anything> -> NAN
|
|
teq r5, ip
|
|
bne LSYM(Lml_i) @ INF / <anything> -> INF
|
|
mov xl, yl
|
|
mov xh, yh
|
|
b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
|
|
1: teq r5, ip
|
|
bne 2f
|
|
orrs r5, yl, yh, lsl #12
|
|
beq LSYM(Lml_z) @ <anything> / INF -> 0
|
|
mov xl, yl
|
|
mov xh, yh
|
|
b LSYM(Lml_n) @ <anything> / NAN -> NAN
|
|
2: @ If both are non-zero, we need to normalize and resume above.
|
|
orrs r6, xl, xh, lsl #1
|
|
orrnes r6, yl, yh, lsl #1
|
|
bne LSYM(Lml_d)
|
|
@ One or both arguments are 0.
|
|
orrs r4, xl, xh, lsl #1
|
|
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
|
|
orrs r5, yl, yh, lsl #1
|
|
bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
|
|
b LSYM(Lml_n) @ 0 / 0 -> NAN
|
|
|
|
FUNC_END aeabi_ddiv
|
|
FUNC_END divdf3
|
|
|
|
#endif /* L_muldivdf3 */
|
|
|
|
#ifdef L_cmpdf2
|
|
|
|
@ Note: only r0 (return value) and ip are clobbered here.
|
|
|
|
ARM_FUNC_START gtdf2
|
|
ARM_FUNC_ALIAS gedf2 gtdf2
|
|
mov ip, #-1
|
|
b 1f
|
|
|
|
ARM_FUNC_START ltdf2
|
|
ARM_FUNC_ALIAS ledf2 ltdf2
|
|
mov ip, #1
|
|
b 1f
|
|
|
|
ARM_FUNC_START cmpdf2
|
|
ARM_FUNC_ALIAS nedf2 cmpdf2
|
|
ARM_FUNC_ALIAS eqdf2 cmpdf2
|
|
mov ip, #1 @ how should we specify unordered here?
|
|
|
|
1: str ip, [sp, #-4]
|
|
|
|
@ Trap any INF/NAN first.
|
|
mov ip, xh, lsl #1
|
|
mvns ip, ip, asr #21
|
|
mov ip, yh, lsl #1
|
|
mvnnes ip, ip, asr #21
|
|
beq 3f
|
|
|
|
@ Test for equality.
|
|
@ Note that 0.0 is equal to -0.0.
|
|
2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
|
|
orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
|
|
teqne xh, yh @ or xh == yh
|
|
teqeq xl, yl @ and xl == yl
|
|
moveq r0, #0 @ then equal.
|
|
RETc(eq)
|
|
|
|
@ Clear C flag
|
|
cmn r0, #0
|
|
|
|
@ Compare sign,
|
|
teq xh, yh
|
|
|
|
@ Compare values if same sign
|
|
cmppl xh, yh
|
|
cmpeq xl, yl
|
|
|
|
@ Result:
|
|
movcs r0, yh, asr #31
|
|
mvncc r0, yh, asr #31
|
|
orr r0, r0, #1
|
|
RET
|
|
|
|
@ Look for a NAN.
|
|
3: mov ip, xh, lsl #1
|
|
mvns ip, ip, asr #21
|
|
bne 4f
|
|
orrs ip, xl, xh, lsl #12
|
|
bne 5f @ x is NAN
|
|
4: mov ip, yh, lsl #1
|
|
mvns ip, ip, asr #21
|
|
bne 2b
|
|
orrs ip, yl, yh, lsl #12
|
|
beq 2b @ y is not NAN
|
|
5: ldr r0, [sp, #-4] @ unordered return code
|
|
RET
|
|
|
|
FUNC_END gedf2
|
|
FUNC_END gtdf2
|
|
FUNC_END ledf2
|
|
FUNC_END ltdf2
|
|
FUNC_END nedf2
|
|
FUNC_END eqdf2
|
|
FUNC_END cmpdf2
|
|
|
|
ARM_FUNC_START aeabi_cdrcmple
|
|
|
|
mov ip, r0
|
|
mov r0, r2
|
|
mov r2, ip
|
|
mov ip, r1
|
|
mov r1, r3
|
|
mov r3, ip
|
|
b 6f
|
|
|
|
ARM_FUNC_START aeabi_cdcmpeq
|
|
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
|
|
|
|
@ The status-returning routines are required to preserve all
|
|
@ registers except ip, lr, and cpsr.
|
|
6: stmfd sp!, {r0, lr}
|
|
ARM_CALL cmpdf2
|
|
@ Set the Z flag correctly, and the C flag unconditionally.
|
|
cmp r0, #0
|
|
@ Clear the C flag if the return value was -1, indicating
|
|
@ that the first operand was smaller than the second.
|
|
cmnmi r0, #0
|
|
RETLDM "r0"
|
|
|
|
FUNC_END aeabi_cdcmple
|
|
FUNC_END aeabi_cdcmpeq
|
|
FUNC_END aeabi_cdrcmple
|
|
|
|
ARM_FUNC_START aeabi_dcmpeq
|
|
|
|
str lr, [sp, #-4]!
|
|
ARM_CALL aeabi_cdcmple
|
|
moveq r0, #1 @ Equal to.
|
|
movne r0, #0 @ Less than, greater than, or unordered.
|
|
RETLDM
|
|
|
|
FUNC_END aeabi_dcmpeq
|
|
|
|
ARM_FUNC_START aeabi_dcmplt
|
|
|
|
str lr, [sp, #-4]!
|
|
ARM_CALL aeabi_cdcmple
|
|
movcc r0, #1 @ Less than.
|
|
movcs r0, #0 @ Equal to, greater than, or unordered.
|
|
RETLDM
|
|
|
|
FUNC_END aeabi_dcmplt
|
|
|
|
ARM_FUNC_START aeabi_dcmple
|
|
|
|
str lr, [sp, #-4]!
|
|
ARM_CALL aeabi_cdcmple
|
|
movls r0, #1 @ Less than or equal to.
|
|
movhi r0, #0 @ Greater than or unordered.
|
|
RETLDM
|
|
|
|
FUNC_END aeabi_dcmple
|
|
|
|
ARM_FUNC_START aeabi_dcmpge
|
|
|
|
str lr, [sp, #-4]!
|
|
ARM_CALL aeabi_cdrcmple
|
|
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
|
|
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
|
|
RETLDM
|
|
|
|
FUNC_END aeabi_dcmpge
|
|
|
|
ARM_FUNC_START aeabi_dcmpgt
|
|
|
|
str lr, [sp, #-4]!
|
|
ARM_CALL aeabi_cdrcmple
|
|
movcc r0, #1 @ Operand 2 is less than operand 1.
|
|
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
|
|
@ or they are unordered.
|
|
RETLDM
|
|
|
|
FUNC_END aeabi_dcmpgt
|
|
|
|
#endif /* L_cmpdf2 */
|
|
|
|
#ifdef L_unorddf2
|
|
|
|
ARM_FUNC_START unorddf2
|
|
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
|
|
|
|
mov ip, xh, lsl #1
|
|
mvns ip, ip, asr #21
|
|
bne 1f
|
|
orrs ip, xl, xh, lsl #12
|
|
bne 3f @ x is NAN
|
|
1: mov ip, yh, lsl #1
|
|
mvns ip, ip, asr #21
|
|
bne 2f
|
|
orrs ip, yl, yh, lsl #12
|
|
bne 3f @ y is NAN
|
|
2: mov r0, #0 @ arguments are ordered.
|
|
RET
|
|
|
|
3: mov r0, #1 @ arguments are unordered.
|
|
RET
|
|
|
|
FUNC_END aeabi_dcmpun
|
|
FUNC_END unorddf2
|
|
|
|
#endif /* L_unorddf2 */
|
|
|
|
#ifdef L_fixdfsi
|
|
|
|
ARM_FUNC_START fixdfsi
|
|
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
|
|
|
|
@ check exponent range.
|
|
mov r2, xh, lsl #1
|
|
adds r2, r2, #(1 << 21)
|
|
bcs 2f @ value is INF or NAN
|
|
bpl 1f @ value is too small
|
|
mov r3, #(0xfffffc00 + 31)
|
|
subs r2, r3, r2, asr #21
|
|
bls 3f @ value is too large
|
|
|
|
@ scale value
|
|
mov r3, xh, lsl #11
|
|
orr r3, r3, #0x80000000
|
|
orr r3, r3, xl, lsr #21
|
|
tst xh, #0x80000000 @ the sign bit
|
|
mov r0, r3, lsr r2
|
|
rsbne r0, r0, #0
|
|
RET
|
|
|
|
1: mov r0, #0
|
|
RET
|
|
|
|
2: orrs xl, xl, xh, lsl #12
|
|
bne 4f @ x is NAN.
|
|
3: ands r0, xh, #0x80000000 @ the sign bit
|
|
moveq r0, #0x7fffffff @ maximum signed positive si
|
|
RET
|
|
|
|
4: mov r0, #0 @ How should we convert NAN?
|
|
RET
|
|
|
|
FUNC_END aeabi_d2iz
|
|
FUNC_END fixdfsi
|
|
|
|
#endif /* L_fixdfsi */
|
|
|
|
#ifdef L_fixunsdfsi
|
|
|
|
ARM_FUNC_START fixunsdfsi
|
|
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
|
|
|
|
@ check exponent range.
|
|
movs r2, xh, lsl #1
|
|
bcs 1f @ value is negative
|
|
adds r2, r2, #(1 << 21)
|
|
bcs 2f @ value is INF or NAN
|
|
bpl 1f @ value is too small
|
|
mov r3, #(0xfffffc00 + 31)
|
|
subs r2, r3, r2, asr #21
|
|
bmi 3f @ value is too large
|
|
|
|
@ scale value
|
|
mov r3, xh, lsl #11
|
|
orr r3, r3, #0x80000000
|
|
orr r3, r3, xl, lsr #21
|
|
mov r0, r3, lsr r2
|
|
RET
|
|
|
|
1: mov r0, #0
|
|
RET
|
|
|
|
2: orrs xl, xl, xh, lsl #12
|
|
bne 4f @ value is NAN.
|
|
3: mov r0, #0xffffffff @ maximum unsigned si
|
|
RET
|
|
|
|
4: mov r0, #0 @ How should we convert NAN?
|
|
RET
|
|
|
|
FUNC_END aeabi_d2uiz
|
|
FUNC_END fixunsdfsi
|
|
|
|
#endif /* L_fixunsdfsi */
|
|
|
|
#ifdef L_truncdfsf2
|
|
|
|
ARM_FUNC_START truncdfsf2
|
|
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
|
|
|
|
@ check exponent range.
|
|
mov r2, xh, lsl #1
|
|
subs r3, r2, #((1023 - 127) << 21)
|
|
subcss ip, r3, #(1 << 21)
|
|
rsbcss ip, ip, #(254 << 21)
|
|
bls 2f @ value is out of range
|
|
|
|
1: @ shift and round mantissa
|
|
and ip, xh, #0x80000000
|
|
mov r2, xl, lsl #3
|
|
orr xl, ip, xl, lsr #29
|
|
cmp r2, #0x80000000
|
|
adc r0, xl, r3, lsl #2
|
|
biceq r0, r0, #1
|
|
RET
|
|
|
|
2: @ either overflow or underflow
|
|
tst xh, #0x40000000
|
|
bne 3f @ overflow
|
|
|
|
@ check if denormalized value is possible
|
|
adds r2, r3, #(23 << 21)
|
|
andlt r0, xh, #0x80000000 @ too small, return signed 0.
|
|
RETc(lt)
|
|
|
|
@ denormalize value so we can resume with the code above afterwards.
|
|
orr xh, xh, #0x00100000
|
|
mov r2, r2, lsr #21
|
|
rsb r2, r2, #24
|
|
rsb ip, r2, #32
|
|
movs r3, xl, lsl ip
|
|
mov xl, xl, lsr r2
|
|
orrne xl, xl, #1 @ fold r3 for rounding considerations.
|
|
mov r3, xh, lsl #11
|
|
mov r3, r3, lsr #11
|
|
orr xl, xl, r3, lsl ip
|
|
mov r3, r3, lsr r2
|
|
mov r3, r3, lsl #1
|
|
b 1b
|
|
|
|
3: @ chech for NAN
|
|
mvns r3, r2, asr #21
|
|
bne 5f @ simple overflow
|
|
orrs r3, xl, xh, lsl #12
|
|
movne r0, #0x7f000000
|
|
orrne r0, r0, #0x00c00000
|
|
RETc(ne) @ return NAN
|
|
|
|
5: @ return INF with sign
|
|
and r0, xh, #0x80000000
|
|
orr r0, r0, #0x7f000000
|
|
orr r0, r0, #0x00800000
|
|
RET
|
|
|
|
FUNC_END aeabi_d2f
|
|
FUNC_END truncdfsf2
|
|
|
|
#endif /* L_truncdfsf2 */
|