1593 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			1593 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /* ieee754-df.S double-precision floating point support for ARM
 | |
| 
 | |
|    Copyright (C) 2003, 2004  Free Software Foundation, Inc.
 | |
|    Contributed by Nicolas Pitre (nico@cam.org)
 | |
| 
 | |
|    This file is free software; you can redistribute it and/or modify it
 | |
|    under the terms of the GNU General Public License as published by the
 | |
|    Free Software Foundation; either version 2, or (at your option) any
 | |
|    later version.
 | |
| 
 | |
|    In addition to the permissions in the GNU General Public License, the
 | |
|    Free Software Foundation gives you unlimited permission to link the
 | |
|    compiled version of this file into combinations with other programs,
 | |
|    and to distribute those combinations without any restriction coming
 | |
|    from the use of this file.  (The General Public License restrictions
 | |
|    do apply in other respects; for example, they cover modification of
 | |
|    the file, and distribution when not linked into a combine
 | |
|    executable.)
 | |
| 
 | |
|    This file is distributed in the hope that it will be useful, but
 | |
|    WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|    General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; see the file COPYING.  If not, write to
 | |
|    the Free Software Foundation, 59 Temple Place - Suite 330,
 | |
|    Boston, MA 02111-1307, USA.  */
 | |
| 
 | |
| 
 | |
| /* We need to know what prefix to add to function names. */
 | |
| 
 | |
| #define L_muldivdf3
 | |
| #define L_addsubdf3
 | |
| #define L_cmpdf2
 | |
| #define L_fixdfsi
 | |
| #define __USER_LABEL_PREFIX__
 | |
| 
 | |
| 
 | |
| #ifdef __ARMEB__
 | |
| #define al	r1
 | |
| #define ah	r0
 | |
| #else
 | |
| #define al	r0
 | |
| #define ah	r1
 | |
| #endif
 | |
| 
 | |
| #ifndef __USER_LABEL_PREFIX__
 | |
| #error  __USER_LABEL_PREFIX__ not defined
 | |
| #endif
 | |
| 
 | |
| /* ANSI concatenation macros.  */
 | |
| 
 | |
| #define CONCAT1(a, b) CONCAT2(a, b)
 | |
| #define CONCAT2(a, b) a ## b
 | |
| 
 | |
| /* Use the right prefix for global labels.  */
 | |
| 
 | |
| #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
 | |
| 
 | |
| #ifdef __ELF__
 | |
| #ifdef __thumb__
 | |
| #define __PLT__  /* Not supported in Thumb assembler (for now).  */
 | |
| #else
 | |
| #define __PLT__ (PLT)
 | |
| #endif
 | |
| #define TYPE(x) .type SYM(x),function
 | |
| #define SIZE(x) .size SYM(x), . - SYM(x)
 | |
| #define LSYM(x) .x
 | |
| #else
 | |
| #define __PLT__
 | |
| #define TYPE(x)
 | |
| #define SIZE(x)
 | |
| #define LSYM(x) x
 | |
| #endif
 | |
| 
 | |
| /* Function end macros.  Variants for interworking.  */
 | |
| 
 | |
| @ This selects the minimum architecture level required.
 | |
| #define __ARM_ARCH__ 3
 | |
| 
 | |
| #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
 | |
|         || defined(__ARM_ARCH_4T__)
 | |
| /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
 | |
|    long multiply instructions.  That includes v3M.  */
 | |
| # undef __ARM_ARCH__
 | |
| # define __ARM_ARCH__ 4
 | |
| #endif
 | |
| 
 | |
| #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
 | |
|         || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
 | |
|         || defined(__ARM_ARCH_5TEJ__)
 | |
| # undef __ARM_ARCH__
 | |
| # define __ARM_ARCH__ 5
 | |
| #endif
 | |
| 
 | |
| #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
 | |
|         || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
 | |
|         || defined(__ARM_ARCH_6ZK__)
 | |
| # undef __ARM_ARCH__
 | |
| # define __ARM_ARCH__ 6
 | |
| #endif
 | |
| 
 | |
| /* How to return from a function call depends on the architecture variant.  */
 | |
| 
 | |
| #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
 | |
| 
 | |
| # define RET            bx      lr
 | |
| # define RETc(x)        bx##x   lr
 | |
| 
 | |
| /* Special precautions for interworking on armv4t.  */
 | |
| # if (__ARM_ARCH__ == 4)
 | |
| 
 | |
| /* Always use bx, not ldr pc.  */
 | |
| #  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
 | |
| #    define __INTERWORKING__
 | |
| #   endif /* __THUMB__ || __THUMB_INTERWORK__ */
 | |
| 
 | |
| /* Include thumb stub before arm mode code.  */
 | |
| #  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
 | |
| #   define __INTERWORKING_STUBS__
 | |
| #  endif /* __thumb__ && !__THUMB_INTERWORK__ */
 | |
| 
 | |
| #endif /* __ARM_ARCH == 4 */
 | |
| 
 | |
| #else
 | |
| 
 | |
| # define RET            mov     pc, lr
 | |
| # define RETc(x)        mov##x  pc, lr
 | |
| 
 | |
| #endif
 | |
| 
 | |
| /* Don't pass dirn, it's there just to get token pasting right.  */
 | |
| 
 | |
| .macro  RETLDM  regs=, cond=, dirn=ia
 | |
| #if defined (__INTERWORKING__)
 | |
|         .ifc "\regs",""
 | |
|         ldr\cond        lr, [sp], #4
 | |
|         .else
 | |
|         ldm\cond\dirn   sp!, {\regs, lr}
 | |
|         .endif
 | |
|         bx\cond lr
 | |
| #else
 | |
|         .ifc "\regs",""
 | |
|         ldr\cond        pc, [sp], #4
 | |
|         .else
 | |
|         ldm\cond\dirn   sp!, {\regs, pc}
 | |
|         .endif
 | |
| #endif
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro ARM_LDIV0
 | |
| LSYM(Ldiv0):
 | |
|         str     lr, [sp, #-4]!
 | |
|         bl      SYM (__div0) __PLT__
 | |
|         mov     r0, #0                  @ About as wrong as it could be.
 | |
|         RETLDM
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro THUMB_LDIV0
 | |
| LSYM(Ldiv0):
 | |
|         push    { lr }
 | |
|         bl      SYM (__div0)
 | |
|         mov     r0, #0                  @ About as wrong as it could be.
 | |
| #if defined (__INTERWORKING__)
 | |
|         pop     { r1 }
 | |
|         bx      r1
 | |
| #else
 | |
|         pop     { pc }
 | |
| #endif
 | |
| .endm
 | |
| 
 | |
| .macro FUNC_END name
 | |
|         SIZE (__\name)
 | |
| .endm
 | |
| 
 | |
| .macro DIV_FUNC_END name
 | |
| #if 0
 | |
| @ How is this supposed to be valid? Multiply defined label ahoy...
 | |
| LSYM(Ldiv0):
 | |
| #endif
 | |
| #ifdef __thumb__
 | |
|         THUMB_LDIV0
 | |
| #else
 | |
|         ARM_LDIV0
 | |
| #endif
 | |
|         FUNC_END \name
 | |
| .endm
 | |
| 
 | |
| .macro THUMB_FUNC_START name
 | |
|         .globl  SYM (\name)
 | |
|         TYPE    (\name)
 | |
|         .thumb_func
 | |
| SYM (\name):
 | |
| .endm
 | |
| 
 | |
| /* Function start macros.  Variants for ARM and Thumb.  */
 | |
| 
 | |
| #ifdef __thumb__
 | |
| #define THUMB_FUNC .thumb_func
 | |
| #define THUMB_CODE .force_thumb
 | |
| #else
 | |
| #define THUMB_FUNC
 | |
| #define THUMB_CODE
 | |
| #endif
 | |
| 
 | |
| .macro FUNC_START name
 | |
|         .text
 | |
|         .globl SYM (__\name)
 | |
|         TYPE (__\name)
 | |
|         .align 0
 | |
|         THUMB_CODE
 | |
|         THUMB_FUNC
 | |
| SYM (__\name):
 | |
| .endm
 | |
| 
 | |
| /* Special function that will always be coded in ARM assembly, even if
 | |
|    in Thumb-only compilation.  */
 | |
| 
 | |
| #if defined(__INTERWORKING_STUBS__)
 | |
| .macro  ARM_FUNC_START name
 | |
|         FUNC_START \name
 | |
|         bx      pc
 | |
|         nop
 | |
|         .arm
 | |
| /* A hook to tell gdb that we've switched to ARM mode.  Also used to call
 | |
|    directly from other local arm routines.  */
 | |
| _L__\name:
 | |
| .endm
 | |
| #define EQUIV .thumb_set
 | |
| /* Branch directly to a function declared with ARM_FUNC_START.
 | |
|    Must be called in arm mode.  */
 | |
| .macro  ARM_CALL name
 | |
|         bl      _L__\name
 | |
| .endm
 | |
| #else
 | |
| .macro  ARM_FUNC_START name
 | |
|         .text
 | |
|         .globl SYM (__\name)
 | |
|         TYPE (__\name)
 | |
|         .align 0
 | |
|         .arm
 | |
| SYM (__\name):
 | |
| .endm
 | |
| #define EQUIV .set
 | |
| .macro  ARM_CALL name
 | |
|         bl      __\name
 | |
| .endm
 | |
| #endif
 | |
| 
 | |
| .macro  FUNC_ALIAS new old
 | |
|         .globl  SYM (__\new)
 | |
| #if defined (__thumb__)
 | |
|         .thumb_set      SYM (__\new), SYM (__\old)
 | |
| #else
 | |
|         .set    SYM (__\new), SYM (__\old)
 | |
| #endif
 | |
| .endm
 | |
| 
 | |
| .macro  ARM_FUNC_ALIAS new old
 | |
|         .globl  SYM (__\new)
 | |
|         EQUIV   SYM (__\new), SYM (__\old)
 | |
| #if defined(__INTERWORKING_STUBS__)
 | |
|         .set    SYM (_L__\new), SYM (_L__\old)
 | |
| #endif
 | |
| .endm
 | |
| 
 | |
| #ifdef __thumb__
 | |
| /* Register aliases.  */
 | |
| 
 | |
| work            .req    r4      @ XXXX is this safe ?
 | |
| dividend        .req    r0
 | |
| divisor         .req    r1
 | |
| overdone        .req    r2
 | |
| result          .req    r2
 | |
| curbit          .req    r3
 | |
| #endif
 | |
| #if 0
 | |
| ip              .req    r12
 | |
| sp              .req    r13
 | |
| lr              .req    r14
 | |
| pc              .req    r15
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Notes:
 | |
|  *
 | |
|  * The goal of this code is to be as fast as possible.  This is
 | |
|  * not meant to be easy to understand for the casual reader.
 | |
|  * For slightly simpler code please see the single precision version
 | |
|  * of this file.
 | |
|  *
 | |
|  * Only the default rounding mode is intended for best performances.
 | |
|  * Exceptions aren't supported yet, but that can be added quite easily
 | |
|  * if necessary without impacting performances.
 | |
|  */
 | |
| 
 | |
| 
 | |
| @ For FPA, float words are always big-endian.
 | |
| @ For VFP, floats words follow the memory system mode.
 | |
| #if defined(__VFP_FP__) && !defined(__ARMEB__)
 | |
| #define xl r0
 | |
| #define xh r1
 | |
| #define yl r2
 | |
| #define yh r3
 | |
| #else
 | |
| #define xh r0
 | |
| #define xl r1
 | |
| #define yh r2
 | |
| #define yl r3
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #ifdef L_negdf2
 | |
| 
 | |
| ARM_FUNC_START negdf2
 | |
| ARM_FUNC_ALIAS aeabi_dneg negdf2
 | |
| 
 | |
| 	@ flip sign bit
 | |
| 	eor	xh, xh, #0x80000000
 | |
| 	RET
 | |
| 
 | |
| 	FUNC_END aeabi_dneg
 | |
| 	FUNC_END negdf2
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #ifdef L_addsubdf3
 | |
| 
 | |
| ARM_FUNC_START aeabi_drsub
 | |
| 
 | |
| 	eor	xh, xh, #0x80000000	@ flip sign bit of first arg
 | |
| 	b	1f
 | |
| 
 | |
| ARM_FUNC_START subdf3
 | |
| ARM_FUNC_ALIAS aeabi_dsub subdf3
 | |
| 
 | |
| 	eor	yh, yh, #0x80000000	@ flip sign bit of second arg
 | |
| #if defined(__INTERWORKING_STUBS__)
 | |
| 	b	1f			@ Skip Thumb-code prologue
 | |
| #endif
 | |
| 
 | |
| ARM_FUNC_START adddf3
 | |
| ARM_FUNC_ALIAS aeabi_dadd adddf3
 | |
| 
 | |
| 1:	stmfd	sp!, {r4, r5, lr}
 | |
| 
 | |
| 	@ Look for zeroes, equal values, INF, or NAN.
 | |
| 	mov	r4, xh, lsl #1
 | |
| 	mov	r5, yh, lsl #1
 | |
| 	teq	r4, r5
 | |
| 	teqeq	xl, yl
 | |
| 	orrnes	ip, r4, xl
 | |
| 	orrnes	ip, r5, yl
 | |
| 	mvnnes	ip, r4, asr #21
 | |
| 	mvnnes	ip, r5, asr #21
 | |
| 	beq	LSYM(Lad_s)
 | |
| 
 | |
| 	@ Compute exponent difference.  Make largest exponent in r4,
 | |
| 	@ corresponding arg in xh-xl, and positive exponent difference in r5.
 | |
| 	mov	r4, r4, lsr #21
 | |
| 	rsbs	r5, r4, r5, lsr #21
 | |
| 	rsblt	r5, r5, #0
 | |
| 	ble	1f
 | |
| 	add	r4, r4, r5
 | |
| 	eor	yl, xl, yl
 | |
| 	eor	yh, xh, yh
 | |
| 	eor	xl, yl, xl
 | |
| 	eor	xh, yh, xh
 | |
| 	eor	yl, xl, yl
 | |
| 	eor	yh, xh, yh
 | |
| 1:
 | |
| 	@ If exponent difference is too large, return largest argument
 | |
| 	@ already in xh-xl.  We need up to 54 bit to handle proper rounding
 | |
| 	@ of 0x1p54 - 1.1.
 | |
| 	cmp	r5, #54
 | |
| 	RETLDM	"r4, r5" hi
 | |
| 
 | |
| 	@ Convert mantissa to signed integer.
 | |
| 	tst	xh, #0x80000000
 | |
| 	mov	xh, xh, lsl #12
 | |
| 	mov	ip, #0x00100000
 | |
| 	orr	xh, ip, xh, lsr #12
 | |
| 	beq	1f
 | |
| 	rsbs	xl, xl, #0
 | |
| 	rsc	xh, xh, #0
 | |
| 1:
 | |
| 	tst	yh, #0x80000000
 | |
| 	mov	yh, yh, lsl #12
 | |
| 	orr	yh, ip, yh, lsr #12
 | |
| 	beq	1f
 | |
| 	rsbs	yl, yl, #0
 | |
| 	rsc	yh, yh, #0
 | |
| 1:
 | |
| 	@ If exponent == difference, one or both args were denormalized.
 | |
| 	@ Since this is not common case, rescale them off line.
 | |
| 	teq	r4, r5
 | |
| 	beq	LSYM(Lad_d)
 | |
| LSYM(Lad_x):
 | |
| 
 | |
| 	@ Compensate for the exponent overlapping the mantissa MSB added later
 | |
| 	sub	r4, r4, #1
 | |
| 
 | |
| 	@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
 | |
| 	rsbs	lr, r5, #32
 | |
| 	blt	1f
 | |
| 	mov	ip, yl, lsl lr
 | |
| 	adds	xl, xl, yl, lsr r5
 | |
| 	adc	xh, xh, #0
 | |
| 	adds	xl, xl, yh, lsl lr
 | |
| 	adcs	xh, xh, yh, asr r5
 | |
| 	b	2f
 | |
| 1:	sub	r5, r5, #32
 | |
| 	add	lr, lr, #32
 | |
| 	cmp	yl, #1
 | |
| 	mov	ip, yh, lsl lr
 | |
| 	orrcs	ip, ip, #2		@ 2 not 1, to allow lsr #1 later
 | |
| 	adds	xl, xl, yh, asr r5
 | |
| 	adcs	xh, xh, yh, asr #31
 | |
| 2:
 | |
| 	@ We now have a result in xh-xl-ip.
 | |
| 	@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
 | |
| 	and	r5, xh, #0x80000000
 | |
| 	bpl	LSYM(Lad_p)
 | |
| 	rsbs	ip, ip, #0
 | |
| 	rscs	xl, xl, #0
 | |
| 	rsc	xh, xh, #0
 | |
| 
 | |
| 	@ Determine how to normalize the result.
 | |
| LSYM(Lad_p):
 | |
| 	cmp	xh, #0x00100000
 | |
| 	bcc	LSYM(Lad_a)
 | |
| 	cmp	xh, #0x00200000
 | |
| 	bcc	LSYM(Lad_e)
 | |
| 
 | |
| 	@ Result needs to be shifted right.
 | |
| 	movs	xh, xh, lsr #1
 | |
| 	movs	xl, xl, rrx
 | |
| 	mov	ip, ip, rrx
 | |
| 	add	r4, r4, #1
 | |
| 
 | |
| 	@ Make sure we did not bust our exponent.
 | |
| 	mov	r2, r4, lsl #21
 | |
| 	cmn	r2, #(2 << 21)
 | |
| 	bcs	LSYM(Lad_o)
 | |
| 
 | |
| 	@ Our result is now properly aligned into xh-xl, remaining bits in ip.
 | |
| 	@ Round with MSB of ip. If halfway between two numbers, round towards
 | |
| 	@ LSB of xl = 0.
 | |
| 	@ Pack final result together.
 | |
| LSYM(Lad_e):
 | |
| 	cmp	ip, #0x80000000
 | |
| 	moveqs	ip, xl, lsr #1
 | |
| 	adcs	xl, xl, #0
 | |
| 	adc	xh, xh, r4, lsl #20
 | |
| 	orr	xh, xh, r5
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 	@ Result must be shifted left and exponent adjusted.
 | |
| LSYM(Lad_a):
 | |
| 	movs	ip, ip, lsl #1
 | |
| 	adcs	xl, xl, xl
 | |
| 	adc	xh, xh, xh
 | |
| 	tst	xh, #0x00100000
 | |
| 	sub	r4, r4, #1
 | |
| 	bne	LSYM(Lad_e)
 | |
| 
 | |
| 	@ No rounding necessary since ip will always be 0 at this point.
 | |
| LSYM(Lad_l):
 | |
| 
 | |
| #if __ARM_ARCH__ < 5
 | |
| 
 | |
| 	teq	xh, #0
 | |
| 	movne	r3, #20
 | |
| 	moveq	r3, #52
 | |
| 	moveq	xh, xl
 | |
| 	moveq	xl, #0
 | |
| 	mov	r2, xh
 | |
| 	cmp	r2, #(1 << 16)
 | |
| 	movhs	r2, r2, lsr #16
 | |
| 	subhs	r3, r3, #16
 | |
| 	cmp	r2, #(1 << 8)
 | |
| 	movhs	r2, r2, lsr #8
 | |
| 	subhs	r3, r3, #8
 | |
| 	cmp	r2, #(1 << 4)
 | |
| 	movhs	r2, r2, lsr #4
 | |
| 	subhs	r3, r3, #4
 | |
| 	cmp	r2, #(1 << 2)
 | |
| 	subhs	r3, r3, #2
 | |
| 	sublo	r3, r3, r2, lsr #1
 | |
| 	sub	r3, r3, r2, lsr #3
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	teq	xh, #0
 | |
| 	moveq	xh, xl
 | |
| 	moveq	xl, #0
 | |
| 	clz	r3, xh
 | |
| 	addeq	r3, r3, #32
 | |
| 	sub	r3, r3, #11
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	@ determine how to shift the value.
 | |
| 	subs	r2, r3, #32
 | |
| 	bge	2f
 | |
| 	adds	r2, r2, #12
 | |
| 	ble	1f
 | |
| 
 | |
| 	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
 | |
| 	@ since a register switch happened above.
 | |
| 	add	ip, r2, #20
 | |
| 	rsb	r2, r2, #12
 | |
| 	mov	xl, xh, lsl ip
 | |
| 	mov	xh, xh, lsr r2
 | |
| 	b	3f
 | |
| 
 | |
| 	@ actually shift value left 1 to 20 bits, which might also represent
 | |
| 	@ 32 to 52 bits if counting the register switch that happened earlier.
 | |
| 1:	add	r2, r2, #20
 | |
| 2:	rsble	ip, r2, #32
 | |
| 	mov	xh, xh, lsl r2
 | |
| 	orrle	xh, xh, xl, lsr ip
 | |
| 	movle	xl, xl, lsl r2
 | |
| 
 | |
| 	@ adjust exponent accordingly.
 | |
| 3:	subs	r4, r4, r3
 | |
| 	addge	xh, xh, r4, lsl #20
 | |
| 	orrge	xh, xh, r5
 | |
| 	RETLDM	"r4, r5" ge
 | |
| 
 | |
| 	@ Exponent too small, denormalize result.
 | |
| 	@ Find out proper shift value.
 | |
| 	mvn	r4, r4
 | |
| 	subs	r4, r4, #31
 | |
| 	bge	2f
 | |
| 	adds	r4, r4, #12
 | |
| 	bgt	1f
 | |
| 
 | |
| 	@ shift result right of 1 to 20 bits, sign is in r5.
 | |
| 	add	r4, r4, #20
 | |
| 	rsb	r2, r4, #32
 | |
| 	mov	xl, xl, lsr r4
 | |
| 	orr	xl, xl, xh, lsl r2
 | |
| 	orr	xh, r5, xh, lsr r4
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
 | |
| 	@ a register switch from xh to xl.
 | |
| 1:	rsb	r4, r4, #12
 | |
| 	rsb	r2, r4, #32
 | |
| 	mov	xl, xl, lsr r2
 | |
| 	orr	xl, xl, xh, lsl r4
 | |
| 	mov	xh, r5
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
 | |
| 	@ from xh to xl.
 | |
| 2:	mov	xl, xh, lsr r4
 | |
| 	mov	xh, r5
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 	@ Adjust exponents for denormalized arguments.
 | |
| 	@ Note that r4 must not remain equal to 0.
 | |
| LSYM(Lad_d):
 | |
| 	teq	r4, #0
 | |
| 	eor	yh, yh, #0x00100000
 | |
| 	eoreq	xh, xh, #0x00100000
 | |
| 	addeq	r4, r4, #1
 | |
| 	subne	r5, r5, #1
 | |
| 	b	LSYM(Lad_x)
 | |
| 
 | |
| 
 | |
| LSYM(Lad_s):
 | |
| 	mvns	ip, r4, asr #21
 | |
| 	mvnnes	ip, r5, asr #21
 | |
| 	beq	LSYM(Lad_i)
 | |
| 
 | |
| 	teq	r4, r5
 | |
| 	teqeq	xl, yl
 | |
| 	beq	1f
 | |
| 
 | |
| 	@ Result is x + 0.0 = x or 0.0 + y = y.
 | |
| 	teq	r4, #0
 | |
| 	moveq	xh, yh
 | |
| 	moveq	xl, yl
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 1:	teq	xh, yh
 | |
| 
 | |
| 	@ Result is x - x = 0.
 | |
| 	movne	xh, #0
 | |
| 	movne	xl, #0
 | |
| 	RETLDM	"r4, r5" ne
 | |
| 
 | |
| 	@ Result is x + x = 2x.
 | |
| 	movs	ip, r4, lsr #21
 | |
| 	bne	2f
 | |
| 	movs	xl, xl, lsl #1
 | |
| 	adcs	xh, xh, xh
 | |
| 	orrcs	xh, xh, #0x80000000
 | |
| 	RETLDM	"r4, r5"
 | |
| 2:	adds	r4, r4, #(2 << 21)
 | |
| 	addcc	xh, xh, #(1 << 20)
 | |
| 	RETLDM	"r4, r5" cc
 | |
| 	and	r5, xh, #0x80000000
 | |
| 
 | |
| 	@ Overflow: return INF.
 | |
| LSYM(Lad_o):
 | |
| 	orr	xh, r5, #0x7f000000
 | |
| 	orr	xh, xh, #0x00f00000
 | |
| 	mov	xl, #0
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 	@ At least one of x or y is INF/NAN.
 | |
| 	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
 | |
| 	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
 | |
| 	@   if either is NAN: return NAN
 | |
| 	@   if opposite sign: return NAN
 | |
| 	@   otherwise return xh-xl (which is INF or -INF)
 | |
| LSYM(Lad_i):
 | |
| 	mvns	ip, r4, asr #21
 | |
| 	movne	xh, yh
 | |
| 	movne	xl, yl
 | |
| 	mvneqs	ip, r5, asr #21
 | |
| 	movne	yh, xh
 | |
| 	movne	yl, xl
 | |
| 	orrs	r4, xl, xh, lsl #12
 | |
| 	orreqs	r5, yl, yh, lsl #12
 | |
| 	teqeq	xh, yh
 | |
| 	orrne	xh, xh, #0x00080000	@ quiet NAN
 | |
| 	RETLDM	"r4, r5"
 | |
| 
 | |
| 	FUNC_END aeabi_dsub
 | |
| 	FUNC_END subdf3
 | |
| 	FUNC_END aeabi_dadd
 | |
| 	FUNC_END adddf3
 | |
| 
 | |
| ARM_FUNC_START floatunsidf
 | |
| ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
 | |
| 
 | |
| 	teq	r0, #0
 | |
| 	moveq	r1, #0
 | |
| 	RETc(eq)
 | |
| 	stmfd	sp!, {r4, r5, lr}
 | |
| 	mov	r4, #0x400		@ initial exponent
 | |
| 	add	r4, r4, #(52-1 - 1)
 | |
| 	mov	r5, #0			@ sign bit is 0
 | |
| 	.ifnc	xl, r0
 | |
| 	mov	xl, r0
 | |
| 	.endif
 | |
| 	mov	xh, #0
 | |
| 	b	LSYM(Lad_l)
 | |
| 
 | |
| 	FUNC_END aeabi_ui2d
 | |
| 	FUNC_END floatunsidf
 | |
| 
 | |
| ARM_FUNC_START floatsidf
 | |
| ARM_FUNC_ALIAS aeabi_i2d floatsidf
 | |
| 
 | |
| 	teq	r0, #0
 | |
| 	moveq	r1, #0
 | |
| 	RETc(eq)
 | |
| 	stmfd	sp!, {r4, r5, lr}
 | |
| 	mov	r4, #0x400		@ initial exponent
 | |
| 	add	r4, r4, #(52-1 - 1)
 | |
| 	ands	r5, r0, #0x80000000	@ sign bit in r5
 | |
| 	rsbmi	r0, r0, #0		@ absolute value
 | |
| 	.ifnc	xl, r0
 | |
| 	mov	xl, r0
 | |
| 	.endif
 | |
| 	mov	xh, #0
 | |
| 	b	LSYM(Lad_l)
 | |
| 
 | |
| 	FUNC_END aeabi_i2d
 | |
| 	FUNC_END floatsidf
 | |
| 
 | |
| ARM_FUNC_START extendsfdf2
 | |
| ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
 | |
| 
 | |
| 	movs	r2, r0, lsl #1		@ toss sign bit
 | |
| 	mov	xh, r2, asr #3		@ stretch exponent
 | |
| 	mov	xh, xh, rrx		@ retrieve sign bit
 | |
| 	mov	xl, r2, lsl #28		@ retrieve remaining bits
 | |
| 	andnes	r3, r2, #0xff000000	@ isolate exponent
 | |
| 	teqne	r3, #0xff000000		@ if not 0, check if INF or NAN
 | |
| 	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.
 | |
| 	RETc(ne)			@ and return it.
 | |
| 
 | |
| 	teq	r2, #0			@ if actually 0
 | |
| 	teqne	r3, #0xff000000		@ or INF or NAN
 | |
| 	RETc(eq)			@ we are done already.
 | |
| 
 | |
| 	@ value was denormalized.  We can normalize it now.
 | |
| 	stmfd	sp!, {r4, r5, lr}
 | |
| 	mov	r4, #0x380		@ setup corresponding exponent
 | |
| 	and	r5, xh, #0x80000000	@ move sign bit in r5
 | |
| 	bic	xh, xh, #0x80000000
 | |
| 	b	LSYM(Lad_l)
 | |
| 
 | |
| 	FUNC_END aeabi_f2d
 | |
| 	FUNC_END extendsfdf2
 | |
| 
 | |
| ARM_FUNC_START floatundidf
 | |
| ARM_FUNC_ALIAS aeabi_ul2d floatundidf
 | |
| 
 | |
| 	orrs	r2, r0, r1
 | |
| #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 | |
| 	mvfeqd	f0, #0.0
 | |
| #endif
 | |
| 	RETc(eq)
 | |
| 
 | |
| #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 | |
| 	@ For hard FPA code we want to return via the tail below so that
 | |
| 	@ we can return the result in f0 as well as in r0/r1 for backwards
 | |
| 	@ compatibility.
 | |
| 	adr	ip, LSYM(f0_ret)
 | |
| 	stmfd	sp!, {r4, r5, ip, lr}
 | |
| #else
 | |
| 	stmfd	sp!, {r4, r5, lr}
 | |
| #endif
 | |
| 
 | |
| 	mov	r5, #0
 | |
| 	b	2f
 | |
| 
 | |
| ARM_FUNC_START floatdidf
 | |
| ARM_FUNC_ALIAS aeabi_l2d floatdidf
 | |
| 
 | |
| 	orrs	r2, r0, r1
 | |
| #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 | |
| 	mvfeqd	f0, #0.0
 | |
| #endif
 | |
| 	RETc(eq)
 | |
| 
 | |
| #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 | |
| 	@ For hard FPA code we want to return via the tail below so that
 | |
| 	@ we can return the result in f0 as well as in r0/r1 for backwards
 | |
| 	@ compatibility.
 | |
| 	adr	ip, LSYM(f0_ret)
 | |
| 	stmfd	sp!, {r4, r5, ip, lr}
 | |
| #else
 | |
| 	stmfd	sp!, {r4, r5, lr}
 | |
| #endif
 | |
| 
 | |
| 	ands	r5, ah, #0x80000000	@ sign bit in r5
 | |
| 	bpl	2f
 | |
| 	rsbs	al, al, #0
 | |
| 	rsc	ah, ah, #0
 | |
| 2:
 | |
| 	mov	r4, #0x400		@ initial exponent
 | |
| 	add	r4, r4, #(52-1 - 1)
 | |
| 
 | |
| 	@ FPA little-endian: must swap the word order.
 | |
| 	.ifnc	xh, ah
 | |
| 	mov	ip, al
 | |
| 	mov	xh, ah
 | |
| 	mov	xl, ip
 | |
| 	.endif
 | |
| 
 | |
| 	movs	ip, xh, lsr #22
 | |
| 	beq	LSYM(Lad_p)
 | |
| 
 | |
| 	@ The value is too big.  Scale it down a bit...
 | |
| 	mov	r2, #3
 | |
| 	movs	ip, ip, lsr #3
 | |
| 	addne	r2, r2, #3
 | |
| 	movs	ip, ip, lsr #3
 | |
| 	addne	r2, r2, #3
 | |
| 	add	r2, r2, ip, lsr #3
 | |
| 
 | |
| 	rsb	r3, r2, #32
 | |
| 	mov	ip, xl, lsl r3
 | |
| 	mov	xl, xl, lsr r2
 | |
| 	orr	xl, xl, xh, lsl r3
 | |
| 	mov	xh, xh, lsr r2
 | |
| 	add	r4, r4, r2
 | |
| 	b	LSYM(Lad_p)
 | |
| 
 | |
| #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 | |
| 
 | |
| 	@ Legacy code expects the result to be returned in f0.  Copy it
 | |
| 	@ there as well.
 | |
| LSYM(f0_ret):
 | |
| 	stmfd	sp!, {r0, r1}
 | |
| 	ldfd	f0, [sp], #8
 | |
| 	RETLDM
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	FUNC_END floatdidf
 | |
| 	FUNC_END aeabi_l2d
 | |
| 	FUNC_END floatundidf
 | |
| 	FUNC_END aeabi_ul2d
 | |
| 
 | |
| #endif /* L_addsubdf3 */
 | |
| 
 | |
| #ifdef L_muldivdf3
 | |
| 
 | |
| ARM_FUNC_START muldf3
 | |
| ARM_FUNC_ALIAS aeabi_dmul muldf3
 | |
| 	stmfd	sp!, {r4, r5, r6, lr}
 | |
| 
 | |
| 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
 | |
| 	mov	ip, #0xff
 | |
| 	orr	ip, ip, #0x700
 | |
| 	ands	r4, ip, xh, lsr #20
 | |
| 	andnes	r5, ip, yh, lsr #20
 | |
| 	teqne	r4, ip
 | |
| 	teqne	r5, ip
 | |
| 	bleq	LSYM(Lml_s)
 | |
| 
 | |
| 	@ Add exponents together
 | |
| 	add	r4, r4, r5
 | |
| 
 | |
| 	@ Determine final sign.
 | |
| 	eor	r6, xh, yh
 | |
| 
 | |
| 	@ Convert mantissa to unsigned integer.
 | |
| 	@ If power of two, branch to a separate path.
 | |
| 	bic	xh, xh, ip, lsl #21
 | |
| 	bic	yh, yh, ip, lsl #21
 | |
| 	orrs	r5, xl, xh, lsl #12
 | |
| 	orrnes	r5, yl, yh, lsl #12
 | |
| 	orr	xh, xh, #0x00100000
 | |
| 	orr	yh, yh, #0x00100000
 | |
| 	beq	LSYM(Lml_1)
 | |
| 
 | |
| #if __ARM_ARCH__ < 4
 | |
| 
 | |
| 	@ Put sign bit in r6, which will be restored in yl later.
 | |
| 	and   r6, r6, #0x80000000
 | |
| 
 | |
| 	@ Well, no way to make it shorter without the umull instruction.
 | |
| 	stmfd	sp!, {r6, r7, r8, r9, sl, fp}
 | |
| 	mov	r7, xl, lsr #16
 | |
| 	mov	r8, yl, lsr #16
 | |
| 	mov	r9, xh, lsr #16
 | |
| 	mov	sl, yh, lsr #16
 | |
| 	bic	xl, xl, r7, lsl #16
 | |
| 	bic	yl, yl, r8, lsl #16
 | |
| 	bic	xh, xh, r9, lsl #16
 | |
| 	bic	yh, yh, sl, lsl #16
 | |
| 	mul	ip, xl, yl
 | |
| 	mul	fp, xl, r8
 | |
| 	mov	lr, #0
 | |
| 	adds	ip, ip, fp, lsl #16
 | |
| 	adc	lr, lr, fp, lsr #16
 | |
| 	mul	fp, r7, yl
 | |
| 	adds	ip, ip, fp, lsl #16
 | |
| 	adc	lr, lr, fp, lsr #16
 | |
| 	mul	fp, xl, sl
 | |
| 	mov	r5, #0
 | |
| 	adds	lr, lr, fp, lsl #16
 | |
| 	adc	r5, r5, fp, lsr #16
 | |
| 	mul	fp, r7, yh
 | |
| 	adds	lr, lr, fp, lsl #16
 | |
| 	adc	r5, r5, fp, lsr #16
 | |
| 	mul	fp, xh, r8
 | |
| 	adds	lr, lr, fp, lsl #16
 | |
| 	adc	r5, r5, fp, lsr #16
 | |
| 	mul	fp, r9, yl
 | |
| 	adds	lr, lr, fp, lsl #16
 | |
| 	adc	r5, r5, fp, lsr #16
 | |
| 	mul	fp, xh, sl
 | |
| 	mul	r6, r9, sl
 | |
| 	adds	r5, r5, fp, lsl #16
 | |
| 	adc	r6, r6, fp, lsr #16
 | |
| 	mul	fp, r9, yh
 | |
| 	adds	r5, r5, fp, lsl #16
 | |
| 	adc	r6, r6, fp, lsr #16
 | |
| 	mul	fp, xl, yh
 | |
| 	adds	lr, lr, fp
 | |
| 	mul	fp, r7, sl
 | |
| 	adcs	r5, r5, fp
 | |
| 	mul	fp, xh, yl
 | |
| 	adc	r6, r6, #0
 | |
| 	adds	lr, lr, fp
 | |
| 	mul	fp, r9, r8
 | |
| 	adcs	r5, r5, fp
 | |
| 	mul	fp, r7, r8
 | |
| 	adc	r6, r6, #0
 | |
| 	adds	lr, lr, fp
 | |
| 	mul	fp, xh, yh
 | |
| 	adcs	r5, r5, fp
 | |
| 	adc	r6, r6, #0
 | |
| 	ldmfd	sp!, {yl, r7, r8, r9, sl, fp}
 | |
| 
 | |
| #else
 | |
| 
 | |
| 	@ Here is the actual multiplication.
 | |
| 	umull	ip, lr, xl, yl
 | |
| 	mov	r5, #0
 | |
| 	umlal	lr, r5, xh, yl
 | |
| 	and	yl, r6, #0x80000000
 | |
| 	umlal	lr, r5, xl, yh
 | |
| 	mov	r6, #0
 | |
| 	umlal	r5, r6, xh, yh
 | |
| 
 | |
| #endif
 | |
| 
 | |
| 	@ The LSBs in ip are only significant for the final rounding.
 | |
| 	@ Fold them into lr.
 | |
| 	teq	ip, #0
 | |
| 	orrne	lr, lr, #1
 | |
| 
 | |
| 	@ Adjust result upon the MSB position.
 | |
| 	sub	r4, r4, #0xff
 | |
| 	cmp	r6, #(1 << (20-11))
 | |
| 	sbc	r4, r4, #0x300
 | |
| 	bcs	1f
 | |
| 	movs	lr, lr, lsl #1
 | |
| 	adcs	r5, r5, r5
 | |
| 	adc	r6, r6, r6
 | |
| 1:
 | |
| 	@ Shift to final position, add sign to result.
 | |
| 	orr	xh, yl, r6, lsl #11
 | |
| 	orr	xh, xh, r5, lsr #21
 | |
| 	mov	xl, r5, lsl #11
 | |
| 	orr	xl, xl, lr, lsr #21
 | |
| 	mov	lr, lr, lsl #11
 | |
| 
 | |
| 	@ Check exponent range for under/overflow.
 | |
| 	subs	ip, r4, #(254 - 1)
 | |
| 	cmphi	ip, #0x700
 | |
| 	bhi	LSYM(Lml_u)
 | |
| 
 | |
| 	@ Round the result, merge final exponent.
 | |
| 	cmp	lr, #0x80000000
 | |
| 	moveqs	lr, xl, lsr #1
 | |
| 	adcs	xl, xl, #0
 | |
| 	adc	xh, xh, r4, lsl #20
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
 | |
| LSYM(Lml_1):
 | |
| 	and	r6, r6, #0x80000000
 | |
| 	orr	xh, r6, xh
 | |
| 	orr	xl, xl, yl
 | |
| 	eor	xh, xh, yh
 | |
| 	subs	r4, r4, ip, lsr #1
 | |
| 	rsbgts	r5, r4, ip
 | |
| 	orrgt	xh, xh, r4, lsl #20
 | |
| 	RETLDM	"r4, r5, r6" gt
 | |
| 
 | |
| 	@ Under/overflow: fix things up for the code below.
 | |
| 	orr	xh, xh, #0x00100000
 | |
| 	mov	lr, #0
 | |
| 	subs	r4, r4, #1
 | |
| 
 | |
| LSYM(Lml_u):
 | |
| 	@ Overflow?
 | |
| 	bgt	LSYM(Lml_o)
 | |
| 
 | |
| 	@ Check if denormalized result is possible, otherwise return signed 0.
 | |
| 	cmn	r4, #(53 + 1)
 | |
| 	movle	xl, #0
 | |
| 	bicle	xh, xh, #0x7fffffff
 | |
| 	RETLDM	"r4, r5, r6" le
 | |
| 
 | |
| 	@ Find out proper shift value.
 | |
| 	rsb	r4, r4, #0
 | |
| 	subs	r4, r4, #32
 | |
| 	bge	2f
 | |
| 	adds	r4, r4, #12
 | |
| 	bgt	1f
 | |
| 
 | |
| 	@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
 | |
| 	add	r4, r4, #20
 | |
| 	rsb	r5, r4, #32
 | |
| 	mov	r3, xl, lsl r5
 | |
| 	mov	xl, xl, lsr r4
 | |
| 	orr	xl, xl, xh, lsl r5
 | |
| 	and	r2, xh, #0x80000000
 | |
| 	bic	xh, xh, #0x80000000
 | |
| 	adds	xl, xl, r3, lsr #31
 | |
| 	adc	xh, r2, xh, lsr r4
 | |
| 	orrs	lr, lr, r3, lsl #1
 | |
| 	biceq	xl, xl, r3, lsr #31
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
 | |
| 	@ a register switch from xh to xl. Then round.
 | |
| 1:	rsb	r4, r4, #12
 | |
| 	rsb	r5, r4, #32
 | |
| 	mov	r3, xl, lsl r4
 | |
| 	mov	xl, xl, lsr r5
 | |
| 	orr	xl, xl, xh, lsl r4
 | |
| 	bic	xh, xh, #0x7fffffff
 | |
| 	adds	xl, xl, r3, lsr #31
 | |
| 	adc	xh, xh, #0
 | |
| 	orrs	lr, lr, r3, lsl #1
 | |
| 	biceq	xl, xl, r3, lsr #31
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
 | |
| 	@ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
 | |
| 2:	rsb	r5, r4, #32
 | |
| 	orr	lr, lr, xl, lsl r5
 | |
| 	mov	r3, xl, lsr r4
 | |
| 	orr	r3, r3, xh, lsl r5
 | |
| 	mov	xl, xh, lsr r4
 | |
| 	bic	xh, xh, #0x7fffffff
 | |
| 	bic	xl, xl, xh, lsr r4
 | |
| 	add	xl, xl, r3, lsr #31
 | |
| 	orrs	lr, lr, r3, lsl #1
 | |
| 	biceq	xl, xl, r3, lsr #31
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	@ One or both arguments are denormalized.
 | |
| 	@ Scale them leftwards and preserve sign bit.
 | |
| LSYM(Lml_d):
 | |
| 	teq	r4, #0
 | |
| 	bne	2f
 | |
| 	and	r6, xh, #0x80000000
 | |
| 1:	movs	xl, xl, lsl #1
 | |
| 	adc	xh, xh, xh
 | |
| 	tst	xh, #0x00100000
 | |
| 	subeq	r4, r4, #1
 | |
| 	beq	1b
 | |
| 	orr	xh, xh, r6
 | |
| 	teq	r5, #0
 | |
| 	movne	pc, lr
 | |
| 2:	and	r6, yh, #0x80000000
 | |
| 3:	movs	yl, yl, lsl #1
 | |
| 	adc	yh, yh, yh
 | |
| 	tst	yh, #0x00100000
 | |
| 	subeq	r5, r5, #1
 | |
| 	beq	3b
 | |
| 	orr	yh, yh, r6
 | |
| 	mov	pc, lr
 | |
| 
 | |
| LSYM(Lml_s):
 | |
| 	@ Isolate the INF and NAN cases away
 | |
| 	teq	r4, ip
 | |
| 	and	r5, ip, yh, lsr #20
 | |
| 	teqne	r5, ip
 | |
| 	beq	1f
 | |
| 
 | |
| 	@ Here, one or more arguments are either denormalized or zero.
 | |
| 	orrs	r6, xl, xh, lsl #1
 | |
| 	orrnes	r6, yl, yh, lsl #1
 | |
| 	bne	LSYM(Lml_d)
 | |
| 
 | |
| 	@ Result is 0, but determine sign anyway.
 | |
| LSYM(Lml_z):
 | |
| 	eor	xh, xh, yh
 | |
| 	bic	xh, xh, #0x7fffffff
 | |
| 	mov	xl, #0
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 1:	@ One or both args are INF or NAN.
 | |
| 	orrs	r6, xl, xh, lsl #1
 | |
| 	moveq	xl, yl
 | |
| 	moveq	xh, yh
 | |
| 	orrnes	r6, yl, yh, lsl #1
 | |
| 	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
 | |
| 	teq	r4, ip
 | |
| 	bne	1f
 | |
| 	orrs	r6, xl, xh, lsl #12
 | |
| 	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
 | |
| 1:	teq	r5, ip
 | |
| 	bne	LSYM(Lml_i)
 | |
| 	orrs	r6, yl, yh, lsl #12
 | |
| 	movne	xl, yl
 | |
| 	movne	xh, yh
 | |
| 	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
 | |
| 
 | |
| 	@ Result is INF, but we need to determine its sign.
 | |
| LSYM(Lml_i):
 | |
| 	eor	xh, xh, yh
 | |
| 
 | |
| 	@ Overflow: return INF (sign already in xh).
 | |
| LSYM(Lml_o):
 | |
| 	and	xh, xh, #0x80000000
 | |
| 	orr	xh, xh, #0x7f000000
 | |
| 	orr	xh, xh, #0x00f00000
 | |
| 	mov	xl, #0
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	@ Return a quiet NAN.
 | |
| LSYM(Lml_n):
 | |
| 	orr	xh, xh, #0x7f000000
 | |
| 	orr	xh, xh, #0x00f80000
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	FUNC_END aeabi_dmul
 | |
| 	FUNC_END muldf3
 | |
| 
 | |
| ARM_FUNC_START divdf3
 | |
| ARM_FUNC_ALIAS aeabi_ddiv divdf3
 | |
| 
 | |
| 	stmfd	sp!, {r4, r5, r6, lr}
 | |
| 
 | |
| 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
 | |
| 	mov	ip, #0xff
 | |
| 	orr	ip, ip, #0x700
 | |
| 	ands	r4, ip, xh, lsr #20
 | |
| 	andnes	r5, ip, yh, lsr #20
 | |
| 	teqne	r4, ip
 | |
| 	teqne	r5, ip
 | |
| 	bleq	LSYM(Ldv_s)
 | |
| 
 | |
| 	@ Substract divisor exponent from dividend''s.
 | |
| 	sub	r4, r4, r5
 | |
| 
 | |
| 	@ Preserve final sign into lr.
 | |
| 	eor	lr, xh, yh
 | |
| 
 | |
| 	@ Convert mantissa to unsigned integer.
 | |
| 	@ Dividend -> r5-r6, divisor -> yh-yl.
 | |
| 	orrs	r5, yl, yh, lsl #12
 | |
| 	mov	xh, xh, lsl #12
 | |
| 	beq	LSYM(Ldv_1)
 | |
| 	mov	yh, yh, lsl #12
 | |
| 	mov	r5, #0x10000000
 | |
| 	orr	yh, r5, yh, lsr #4
 | |
| 	orr	yh, yh, yl, lsr #24
 | |
| 	mov	yl, yl, lsl #8
 | |
| 	orr	r5, r5, xh, lsr #4
 | |
| 	orr	r5, r5, xl, lsr #24
 | |
| 	mov	r6, xl, lsl #8
 | |
| 
 | |
| 	@ Initialize xh with final sign bit.
 | |
| 	and	xh, lr, #0x80000000
 | |
| 
 | |
| 	@ Ensure result will land to known bit position.
 | |
| 	@ Apply exponent bias accordingly.
 | |
| 	cmp	r5, yh
 | |
| 	cmpeq	r6, yl
 | |
| 	adc	r4, r4, #(255 - 2)
 | |
| 	add	r4, r4, #0x300
 | |
| 	bcs	1f
 | |
| 	movs	yh, yh, lsr #1
 | |
| 	mov	yl, yl, rrx
 | |
| 1:
 | |
| 	@ Perform first substraction to align result to a nibble.
 | |
| 	subs	r6, r6, yl
 | |
| 	sbc	r5, r5, yh
 | |
| 	movs	yh, yh, lsr #1
 | |
| 	mov	yl, yl, rrx
 | |
| 	mov	xl, #0x00100000
 | |
| 	mov	ip, #0x00080000
 | |
| 
 | |
| 	@ The actual division loop.
 | |
| 1:	subs	lr, r6, yl
 | |
| 	sbcs	lr, r5, yh
 | |
| 	subcs	r6, r6, yl
 | |
| 	movcs	r5, lr
 | |
| 	orrcs	xl, xl, ip
 | |
| 	movs	yh, yh, lsr #1
 | |
| 	mov	yl, yl, rrx
 | |
| 	subs	lr, r6, yl
 | |
| 	sbcs	lr, r5, yh
 | |
| 	subcs	r6, r6, yl
 | |
| 	movcs	r5, lr
 | |
| 	orrcs	xl, xl, ip, lsr #1
 | |
| 	movs	yh, yh, lsr #1
 | |
| 	mov	yl, yl, rrx
 | |
| 	subs	lr, r6, yl
 | |
| 	sbcs	lr, r5, yh
 | |
| 	subcs	r6, r6, yl
 | |
| 	movcs	r5, lr
 | |
| 	orrcs	xl, xl, ip, lsr #2
 | |
| 	movs	yh, yh, lsr #1
 | |
| 	mov	yl, yl, rrx
 | |
| 	subs	lr, r6, yl
 | |
| 	sbcs	lr, r5, yh
 | |
| 	subcs	r6, r6, yl
 | |
| 	movcs	r5, lr
 | |
| 	orrcs	xl, xl, ip, lsr #3
 | |
| 
 | |
| 	orrs	lr, r5, r6
 | |
| 	beq	2f
 | |
| 	mov	r5, r5, lsl #4
 | |
| 	orr	r5, r5, r6, lsr #28
 | |
| 	mov	r6, r6, lsl #4
 | |
| 	mov	yh, yh, lsl #3
 | |
| 	orr	yh, yh, yl, lsr #29
 | |
| 	mov	yl, yl, lsl #3
 | |
| 	movs	ip, ip, lsr #4
 | |
| 	bne	1b
 | |
| 
 | |
| 	@ We are done with a word of the result.
 | |
| 	@ Loop again for the low word if this pass was for the high word.
 | |
| 	tst	xh, #0x00100000
 | |
| 	bne	3f
 | |
| 	orr	xh, xh, xl
 | |
| 	mov	xl, #0
 | |
| 	mov	ip, #0x80000000
 | |
| 	b	1b
 | |
| 2:
 | |
| 	@ Be sure result starts in the high word.
 | |
| 	tst	xh, #0x00100000
 | |
| 	orreq	xh, xh, xl
 | |
| 	moveq	xl, #0
 | |
| 3:
 | |
| 	@ Check exponent range for under/overflow.
 | |
| 	subs	ip, r4, #(254 - 1)
 | |
| 	cmphi	ip, #0x700
 | |
| 	bhi	LSYM(Lml_u)
 | |
| 
 | |
| 	@ Round the result, merge final exponent.
 | |
| 	subs	ip, r5, yh
 | |
| 	subeqs	ip, r6, yl
 | |
| 	moveqs	ip, xl, lsr #1
 | |
| 	adcs	xl, xl, #0
 | |
| 	adc	xh, xh, r4, lsl #20
 | |
| 	RETLDM	"r4, r5, r6"
 | |
| 
 | |
| 	@ Division by 0x1p*: shortcut a lot of code.
 | |
| LSYM(Ldv_1):
 | |
| 	and	lr, lr, #0x80000000
 | |
| 	orr	xh, lr, xh, lsr #12
 | |
| 	adds	r4, r4, ip, lsr #1
 | |
| 	rsbgts	r5, r4, ip
 | |
| 	orrgt	xh, xh, r4, lsl #20
 | |
| 	RETLDM	"r4, r5, r6" gt
 | |
| 
 | |
| 	orr	xh, xh, #0x00100000
 | |
| 	mov	lr, #0
 | |
| 	subs	r4, r4, #1
 | |
| 	b	LSYM(Lml_u)
 | |
| 
 | |
| 	@ Result mightt need to be denormalized: put remainder bits
 | |
| 	@ in lr for rounding considerations.
 | |
| LSYM(Ldv_u):
 | |
| 	orr	lr, r5, r6
 | |
| 	b	LSYM(Lml_u)
 | |
| 
 | |
| 	@ One or both arguments is either INF, NAN or zero.
 | |
| LSYM(Ldv_s):
 | |
| 	and	r5, ip, yh, lsr #20
 | |
| 	teq	r4, ip
 | |
| 	teqeq	r5, ip
 | |
| 	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
 | |
| 	teq	r4, ip
 | |
| 	bne	1f
 | |
| 	orrs	r4, xl, xh, lsl #12
 | |
| 	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
 | |
| 	teq	r5, ip
 | |
| 	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
 | |
| 	mov	xl, yl
 | |
| 	mov	xh, yh
 | |
| 	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
 | |
| 1:	teq	r5, ip
 | |
| 	bne	2f
 | |
| 	orrs	r5, yl, yh, lsl #12
 | |
| 	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
 | |
| 	mov	xl, yl
 | |
| 	mov	xh, yh
 | |
| 	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
 | |
| 2:	@ If both are non-zero, we need to normalize and resume above.
 | |
| 	orrs	r6, xl, xh, lsl #1
 | |
| 	orrnes	r6, yl, yh, lsl #1
 | |
| 	bne	LSYM(Lml_d)
 | |
| 	@ One or both arguments are 0.
 | |
| 	orrs	r4, xl, xh, lsl #1
 | |
| 	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
 | |
| 	orrs	r5, yl, yh, lsl #1
 | |
| 	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
 | |
| 	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
 | |
| 
 | |
| 	FUNC_END aeabi_ddiv
 | |
| 	FUNC_END divdf3
 | |
| 
 | |
| #endif /* L_muldivdf3 */
 | |
| 
 | |
| #ifdef L_cmpdf2
 | |
| 
 | |
| @ Note: only r0 (return value) and ip are clobbered here.
 | |
| 
 | |
| ARM_FUNC_START gtdf2
 | |
| ARM_FUNC_ALIAS gedf2 gtdf2
 | |
| 	mov	ip, #-1
 | |
| 	b	1f
 | |
| 
 | |
| ARM_FUNC_START ltdf2
 | |
| ARM_FUNC_ALIAS ledf2 ltdf2
 | |
| 	mov	ip, #1
 | |
| 	b	1f
 | |
| 
 | |
| ARM_FUNC_START cmpdf2
 | |
| ARM_FUNC_ALIAS nedf2 cmpdf2
 | |
| ARM_FUNC_ALIAS eqdf2 cmpdf2
 | |
| 	mov	ip, #1			@ how should we specify unordered here?
 | |
| 
 | |
| 1:	str	ip, [sp, #-4]
 | |
| 
 | |
| 	@ Trap any INF/NAN first.
 | |
| 	mov	ip, xh, lsl #1
 | |
| 	mvns	ip, ip, asr #21
 | |
| 	mov	ip, yh, lsl #1
 | |
| 	mvnnes	ip, ip, asr #21
 | |
| 	beq	3f
 | |
| 
 | |
| 	@ Test for equality.
 | |
| 	@ Note that 0.0 is equal to -0.0.
 | |
| 2:	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
 | |
| 	orreqs	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
 | |
| 	teqne	xh, yh			@ or xh == yh
 | |
| 	teqeq	xl, yl			@ and xl == yl
 | |
| 	moveq	r0, #0			@ then equal.
 | |
| 	RETc(eq)
 | |
| 
 | |
| 	@ Clear C flag
 | |
| 	cmn	r0, #0
 | |
| 
 | |
| 	@ Compare sign,
 | |
| 	teq	xh, yh
 | |
| 
 | |
| 	@ Compare values if same sign
 | |
| 	cmppl	xh, yh
 | |
| 	cmpeq	xl, yl
 | |
| 
 | |
| 	@ Result:
 | |
| 	movcs	r0, yh, asr #31
 | |
| 	mvncc	r0, yh, asr #31
 | |
| 	orr	r0, r0, #1
 | |
| 	RET
 | |
| 
 | |
| 	@ Look for a NAN.
 | |
| 3:	mov	ip, xh, lsl #1
 | |
| 	mvns	ip, ip, asr #21
 | |
| 	bne	4f
 | |
| 	orrs	ip, xl, xh, lsl #12
 | |
| 	bne	5f			@ x is NAN
 | |
| 4:	mov	ip, yh, lsl #1
 | |
| 	mvns	ip, ip, asr #21
 | |
| 	bne	2b
 | |
| 	orrs	ip, yl, yh, lsl #12
 | |
| 	beq	2b			@ y is not NAN
 | |
| 5:	ldr	r0, [sp, #-4]		@ unordered return code
 | |
| 	RET
 | |
| 
 | |
| 	FUNC_END gedf2
 | |
| 	FUNC_END gtdf2
 | |
| 	FUNC_END ledf2
 | |
| 	FUNC_END ltdf2
 | |
| 	FUNC_END nedf2
 | |
| 	FUNC_END eqdf2
 | |
| 	FUNC_END cmpdf2
 | |
| 
 | |
| ARM_FUNC_START aeabi_cdrcmple
 | |
| 
 | |
| 	mov	ip, r0
 | |
| 	mov	r0, r2
 | |
| 	mov	r2, ip
 | |
| 	mov	ip, r1
 | |
| 	mov	r1, r3
 | |
| 	mov	r3, ip
 | |
| 	b	6f
 | |
| 
 | |
| ARM_FUNC_START aeabi_cdcmpeq
 | |
| ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
 | |
| 
 | |
| 	@ The status-returning routines are required to preserve all
 | |
| 	@ registers except ip, lr, and cpsr.
 | |
| 6:	stmfd	sp!, {r0, lr}
 | |
| 	ARM_CALL cmpdf2
 | |
| 	@ Set the Z flag correctly, and the C flag unconditionally.
 | |
| 	cmp	 r0, #0
 | |
| 	@ Clear the C flag if the return value was -1, indicating
 | |
| 	@ that the first operand was smaller than the second.
 | |
| 	cmnmi	 r0, #0
 | |
| 	RETLDM   "r0"
 | |
| 
 | |
| 	FUNC_END aeabi_cdcmple
 | |
| 	FUNC_END aeabi_cdcmpeq
 | |
| 	FUNC_END aeabi_cdrcmple
 | |
| 
 | |
| ARM_FUNC_START	aeabi_dcmpeq
 | |
| 
 | |
| 	str	lr, [sp, #-4]!
 | |
| 	ARM_CALL aeabi_cdcmple
 | |
| 	moveq	r0, #1	@ Equal to.
 | |
| 	movne	r0, #0	@ Less than, greater than, or unordered.
 | |
| 	RETLDM
 | |
| 
 | |
| 	FUNC_END aeabi_dcmpeq
 | |
| 
 | |
| ARM_FUNC_START	aeabi_dcmplt
 | |
| 
 | |
| 	str	lr, [sp, #-4]!
 | |
| 	ARM_CALL aeabi_cdcmple
 | |
| 	movcc	r0, #1	@ Less than.
 | |
| 	movcs	r0, #0	@ Equal to, greater than, or unordered.
 | |
| 	RETLDM
 | |
| 
 | |
| 	FUNC_END aeabi_dcmplt
 | |
| 
 | |
| ARM_FUNC_START	aeabi_dcmple
 | |
| 
 | |
| 	str	lr, [sp, #-4]!
 | |
| 	ARM_CALL aeabi_cdcmple
 | |
| 	movls	r0, #1  @ Less than or equal to.
 | |
| 	movhi	r0, #0	@ Greater than or unordered.
 | |
| 	RETLDM
 | |
| 
 | |
| 	FUNC_END aeabi_dcmple
 | |
| 
 | |
| ARM_FUNC_START	aeabi_dcmpge
 | |
| 
 | |
| 	str	lr, [sp, #-4]!
 | |
| 	ARM_CALL aeabi_cdrcmple
 | |
| 	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
 | |
| 	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
 | |
| 	RETLDM
 | |
| 
 | |
| 	FUNC_END aeabi_dcmpge
 | |
| 
 | |
| ARM_FUNC_START	aeabi_dcmpgt
 | |
| 
 | |
| 	str	lr, [sp, #-4]!
 | |
| 	ARM_CALL aeabi_cdrcmple
 | |
| 	movcc	r0, #1	@ Operand 2 is less than operand 1.
 | |
| 	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
 | |
| 			@ or they are unordered.
 | |
| 	RETLDM
 | |
| 
 | |
| 	FUNC_END aeabi_dcmpgt
 | |
| 
 | |
| #endif /* L_cmpdf2 */
 | |
| 
 | |
| #ifdef L_unorddf2
 | |
| 
 | |
| ARM_FUNC_START unorddf2
 | |
| ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
 | |
| 
 | |
| 	mov	ip, xh, lsl #1
 | |
| 	mvns	ip, ip, asr #21
 | |
| 	bne	1f
 | |
| 	orrs	ip, xl, xh, lsl #12
 | |
| 	bne	3f			@ x is NAN
 | |
| 1:	mov	ip, yh, lsl #1
 | |
| 	mvns	ip, ip, asr #21
 | |
| 	bne	2f
 | |
| 	orrs	ip, yl, yh, lsl #12
 | |
| 	bne	3f			@ y is NAN
 | |
| 2:	mov	r0, #0			@ arguments are ordered.
 | |
| 	RET
 | |
| 
 | |
| 3:	mov	r0, #1			@ arguments are unordered.
 | |
| 	RET
 | |
| 
 | |
| 	FUNC_END aeabi_dcmpun
 | |
| 	FUNC_END unorddf2
 | |
| 
 | |
| #endif /* L_unorddf2 */
 | |
| 
 | |
| #ifdef L_fixdfsi
 | |
| 
 | |
| ARM_FUNC_START fixdfsi
 | |
| ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
 | |
| 
 | |
| 	@ check exponent range.
 | |
| 	mov	r2, xh, lsl #1
 | |
| 	adds	r2, r2, #(1 << 21)
 | |
| 	bcs	2f			@ value is INF or NAN
 | |
| 	bpl	1f			@ value is too small
 | |
| 	mov	r3, #(0xfffffc00 + 31)
 | |
| 	subs	r2, r3, r2, asr #21
 | |
| 	bls	3f			@ value is too large
 | |
| 
 | |
| 	@ scale value
 | |
| 	mov	r3, xh, lsl #11
 | |
| 	orr	r3, r3, #0x80000000
 | |
| 	orr	r3, r3, xl, lsr #21
 | |
| 	tst	xh, #0x80000000		@ the sign bit
 | |
| 	mov	r0, r3, lsr r2
 | |
| 	rsbne	r0, r0, #0
 | |
| 	RET
 | |
| 
 | |
| 1:	mov	r0, #0
 | |
| 	RET
 | |
| 
 | |
| 2:	orrs	xl, xl, xh, lsl #12
 | |
| 	bne	4f			@ x is NAN.
 | |
| 3:	ands	r0, xh, #0x80000000	@ the sign bit
 | |
| 	moveq	r0, #0x7fffffff		@ maximum signed positive si
 | |
| 	RET
 | |
| 
 | |
| 4:	mov	r0, #0			@ How should we convert NAN?
 | |
| 	RET
 | |
| 
 | |
| 	FUNC_END aeabi_d2iz
 | |
| 	FUNC_END fixdfsi
 | |
| 
 | |
| #endif /* L_fixdfsi */
 | |
| 
 | |
| #ifdef L_fixunsdfsi
 | |
| 
 | |
| ARM_FUNC_START fixunsdfsi
 | |
| ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
 | |
| 
 | |
| 	@ check exponent range.
 | |
| 	movs	r2, xh, lsl #1
 | |
| 	bcs	1f			@ value is negative
 | |
| 	adds	r2, r2, #(1 << 21)
 | |
| 	bcs	2f			@ value is INF or NAN
 | |
| 	bpl	1f			@ value is too small
 | |
| 	mov	r3, #(0xfffffc00 + 31)
 | |
| 	subs	r2, r3, r2, asr #21
 | |
| 	bmi	3f			@ value is too large
 | |
| 
 | |
| 	@ scale value
 | |
| 	mov	r3, xh, lsl #11
 | |
| 	orr	r3, r3, #0x80000000
 | |
| 	orr	r3, r3, xl, lsr #21
 | |
| 	mov	r0, r3, lsr r2
 | |
| 	RET
 | |
| 
 | |
| 1:	mov	r0, #0
 | |
| 	RET
 | |
| 
 | |
| 2:	orrs	xl, xl, xh, lsl #12
 | |
| 	bne	4f			@ value is NAN.
 | |
| 3:	mov	r0, #0xffffffff		@ maximum unsigned si
 | |
| 	RET
 | |
| 
 | |
| 4:	mov	r0, #0			@ How should we convert NAN?
 | |
| 	RET
 | |
| 
 | |
| 	FUNC_END aeabi_d2uiz
 | |
| 	FUNC_END fixunsdfsi
 | |
| 
 | |
| #endif /* L_fixunsdfsi */
 | |
| 
 | |
| #ifdef L_truncdfsf2
 | |
| 
 | |
| ARM_FUNC_START truncdfsf2
 | |
| ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
 | |
| 
 | |
| 	@ check exponent range.
 | |
| 	mov	r2, xh, lsl #1
 | |
| 	subs	r3, r2, #((1023 - 127) << 21)
 | |
| 	subcss	ip, r3, #(1 << 21)
 | |
| 	rsbcss	ip, ip, #(254 << 21)
 | |
| 	bls	2f			@ value is out of range
 | |
| 
 | |
| 1:	@ shift and round mantissa
 | |
| 	and	ip, xh, #0x80000000
 | |
| 	mov	r2, xl, lsl #3
 | |
| 	orr	xl, ip, xl, lsr #29
 | |
| 	cmp	r2, #0x80000000
 | |
| 	adc	r0, xl, r3, lsl #2
 | |
| 	biceq	r0, r0, #1
 | |
| 	RET
 | |
| 
 | |
| 2:	@ either overflow or underflow
 | |
| 	tst	xh, #0x40000000
 | |
| 	bne	3f			@ overflow
 | |
| 
 | |
| 	@ check if denormalized value is possible
 | |
| 	adds	r2, r3, #(23 << 21)
 | |
| 	andlt	r0, xh, #0x80000000	@ too small, return signed 0.
 | |
| 	RETc(lt)
 | |
| 
 | |
| 	@ denormalize value so we can resume with the code above afterwards.
 | |
| 	orr	xh, xh, #0x00100000
 | |
| 	mov	r2, r2, lsr #21
 | |
| 	rsb	r2, r2, #24
 | |
| 	rsb	ip, r2, #32
 | |
| 	movs	r3, xl, lsl ip
 | |
| 	mov	xl, xl, lsr r2
 | |
| 	orrne	xl, xl, #1		@ fold r3 for rounding considerations.
 | |
| 	mov	r3, xh, lsl #11
 | |
| 	mov	r3, r3, lsr #11
 | |
| 	orr	xl, xl, r3, lsl ip
 | |
| 	mov	r3, r3, lsr r2
 | |
| 	mov	r3, r3, lsl #1
 | |
| 	b	1b
 | |
| 
 | |
| 3:	@ chech for NAN
 | |
| 	mvns	r3, r2, asr #21
 | |
| 	bne	5f			@ simple overflow
 | |
| 	orrs	r3, xl, xh, lsl #12
 | |
| 	movne	r0, #0x7f000000
 | |
| 	orrne	r0, r0, #0x00c00000
 | |
| 	RETc(ne)			@ return NAN
 | |
| 
 | |
| 5:	@ return INF with sign
 | |
| 	and	r0, xh, #0x80000000
 | |
| 	orr	r0, r0, #0x7f000000
 | |
| 	orr	r0, r0, #0x00800000
 | |
| 	RET
 | |
| 
 | |
| 	FUNC_END aeabi_d2f
 | |
| 	FUNC_END truncdfsf2
 | |
| 
 | |
| #endif /* L_truncdfsf2 */
 |