136 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			136 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/* memcpy.S: optimised assembly memcpy
 | 
						|
 *
 | 
						|
 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
 | 
						|
 * Written by David Howells (dhowells@redhat.com)
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU General Public License
 | 
						|
 * as published by the Free Software Foundation; either version
 | 
						|
 * 2 of the License, or (at your option) any later version.
 | 
						|
 */
 | 
						|
 | 
						|
 | 
						|
        .text
 | 
						|
        .p2align	4
 | 
						|
 | 
						|
###############################################################################
 | 
						|
#
 | 
						|
# void *memcpy(void *to, const char *from, size_t count)
 | 
						|
#
 | 
						|
# - NOTE: must not use any stack. exception detection performs function return
 | 
						|
#         to caller's fixup routine, aborting the remainder of the copy
 | 
						|
#
 | 
						|
###############################################################################
 | 
						|
        .globl		memcpy,__memcpy_end
 | 
						|
        .type		memcpy,@function
 | 
						|
memcpy:
 | 
						|
	or.p		gr8,gr9,gr4
 | 
						|
	orcc		gr10,gr0,gr0,icc3
 | 
						|
	or.p		gr10,gr4,gr4
 | 
						|
	beqlr		icc3,#0
 | 
						|
 | 
						|
	# optimise based on best common alignment for to, from & count
 | 
						|
	andicc.p	gr4,#0x0f,gr0,icc0
 | 
						|
	setlos		#8,gr11
 | 
						|
	andicc.p	gr4,#0x07,gr0,icc1
 | 
						|
	beq		icc0,#0,memcpy_16
 | 
						|
	andicc.p	gr4,#0x03,gr0,icc0
 | 
						|
	beq		icc1,#0,memcpy_8
 | 
						|
	andicc.p	gr4,#0x01,gr0,icc1
 | 
						|
	beq		icc0,#0,memcpy_4
 | 
						|
	setlos.p	#1,gr11
 | 
						|
	beq		icc1,#0,memcpy_2
 | 
						|
 | 
						|
	# do byte by byte copy
 | 
						|
	sub.p		gr8,gr11,gr3
 | 
						|
	sub		gr9,gr11,gr9
 | 
						|
0:	ldubu.p		@(gr9,gr11),gr4
 | 
						|
	subicc		gr10,#1,gr10,icc0
 | 
						|
	stbu.p		gr4,@(gr3,gr11)
 | 
						|
	bne		icc0,#2,0b
 | 
						|
	bralr
 | 
						|
 | 
						|
	# do halfword by halfword copy
 | 
						|
memcpy_2:
 | 
						|
	setlos		#2,gr11
 | 
						|
	sub.p		gr8,gr11,gr3
 | 
						|
	sub		gr9,gr11,gr9
 | 
						|
0:	lduhu.p		@(gr9,gr11),gr4
 | 
						|
	subicc		gr10,#2,gr10,icc0
 | 
						|
	sthu.p		gr4,@(gr3,gr11)
 | 
						|
	bne		icc0,#2,0b
 | 
						|
	bralr
 | 
						|
 | 
						|
	# do word by word copy
 | 
						|
memcpy_4:
 | 
						|
	setlos		#4,gr11
 | 
						|
	sub.p		gr8,gr11,gr3
 | 
						|
	sub		gr9,gr11,gr9
 | 
						|
0:	ldu.p		@(gr9,gr11),gr4
 | 
						|
	subicc		gr10,#4,gr10,icc0
 | 
						|
	stu.p		gr4,@(gr3,gr11)
 | 
						|
	bne		icc0,#2,0b
 | 
						|
	bralr
 | 
						|
 | 
						|
	# do double-word by double-word copy
 | 
						|
memcpy_8:
 | 
						|
	sub.p		gr8,gr11,gr3
 | 
						|
	sub		gr9,gr11,gr9
 | 
						|
0:	lddu.p		@(gr9,gr11),gr4
 | 
						|
	subicc		gr10,#8,gr10,icc0
 | 
						|
	stdu.p		gr4,@(gr3,gr11)
 | 
						|
	bne		icc0,#2,0b
 | 
						|
	bralr
 | 
						|
 | 
						|
	# do quad-word by quad-word copy
 | 
						|
memcpy_16:
 | 
						|
	sub.p		gr8,gr11,gr3
 | 
						|
	sub		gr9,gr11,gr9
 | 
						|
0:	lddu		@(gr9,gr11),gr4
 | 
						|
	lddu.p		@(gr9,gr11),gr6
 | 
						|
	subicc		gr10,#16,gr10,icc0
 | 
						|
	stdu		gr4,@(gr3,gr11)
 | 
						|
	stdu.p		gr6,@(gr3,gr11)
 | 
						|
	bne		icc0,#2,0b
 | 
						|
	bralr
 | 
						|
__memcpy_end:
 | 
						|
 | 
						|
	.size		memcpy, __memcpy_end-memcpy
 | 
						|
 | 
						|
###############################################################################
 | 
						|
#
 | 
						|
# copy to/from userspace
 | 
						|
# - return the number of bytes that could not be copied (0 on complete success)
 | 
						|
#
 | 
						|
# long __memcpy_user(void *dst, const void *src, size_t count)
 | 
						|
#
 | 
						|
###############################################################################
 | 
						|
        .globl		__memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
 | 
						|
        .type		__memcpy_user,@function
 | 
						|
__memcpy_user:
 | 
						|
	movsg		lr,gr7
 | 
						|
	subi.p		sp,#8,sp
 | 
						|
	add		gr8,gr10,gr6		; calculate expected end address
 | 
						|
	stdi		gr6,@(sp,#0)
 | 
						|
 | 
						|
	# abuse memcpy to do the dirty work
 | 
						|
	call		memcpy
 | 
						|
__memcpy_user_error_lr:
 | 
						|
	ldi.p		@(sp,#4),gr7
 | 
						|
	setlos		#0,gr8
 | 
						|
	jmpl.p		@(gr7,gr0)
 | 
						|
	addi		sp,#8,sp
 | 
						|
 | 
						|
	# deal any exception generated by memcpy
 | 
						|
	# GR8 - memcpy's current dest address
 | 
						|
	# GR11 - memset's step value (index register for store insns)
 | 
						|
__memcpy_user_error_handler:
 | 
						|
	lddi.p		@(sp,#0),gr4		; load GR4 with dst+count, GR5 with ret addr
 | 
						|
	add		gr11,gr3,gr7
 | 
						|
	sub.p		gr4,gr7,gr8
 | 
						|
 | 
						|
	addi		sp,#8,sp
 | 
						|
	jmpl		@(gr5,gr0)
 | 
						|
 | 
						|
	.size		__memcpy_user, .-__memcpy_user
 |