250 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			250 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
 | |
|  *	
 | |
|  * This file is subject to the terms and conditions of the GNU General Public
 | |
|  * License.  See the file COPYING in the main directory of this archive
 | |
|  * for more details. No warranty for anything given at all.
 | |
|  */
 | |
| #include <linux/linkage.h>
 | |
| #include <asm/dwarf2.h>
 | |
| #include <asm/errno.h>
 | |
| 
 | |
| /*
 | |
|  * Checksum copy with exception handling.
 | |
|  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 
 | |
|  * destination is zeroed.
 | |
|  * 
 | |
|  * Input
 | |
|  * rdi  source
 | |
|  * rsi  destination
 | |
|  * edx  len (32bit)
 | |
|  * ecx  sum (32bit) 
 | |
|  * r8   src_err_ptr (int)
 | |
|  * r9   dst_err_ptr (int)
 | |
|  *
 | |
|  * Output
 | |
|  * eax  64bit sum. undefined in case of exception.
 | |
|  * 
 | |
|  * Wrappers need to take care of valid exception sum and zeroing.		 
 | |
|  * They also should align source or destination to 8 bytes.
 | |
|  */
 | |
| 
 | |
| 	.macro source
 | |
| 10:
 | |
| 	.section __ex_table,"a"
 | |
| 	.align 8
 | |
| 	.quad 10b,.Lbad_source
 | |
| 	.previous
 | |
| 	.endm
 | |
| 		
 | |
| 	.macro dest
 | |
| 20:
 | |
| 	.section __ex_table,"a"
 | |
| 	.align 8
 | |
| 	.quad 20b,.Lbad_dest
 | |
| 	.previous
 | |
| 	.endm
 | |
| 			
 | |
| 	.macro ignore L=.Lignore
 | |
| 30:
 | |
| 	.section __ex_table,"a"
 | |
| 	.align 8
 | |
| 	.quad 30b,\L
 | |
| 	.previous
 | |
| 	.endm
 | |
| 	
 | |
| 				
 | |
| ENTRY(csum_partial_copy_generic)
 | |
| 	CFI_STARTPROC
 | |
| 	cmpl	 $3*64,%edx
 | |
| 	jle	 .Lignore
 | |
| 
 | |
| .Lignore:		
 | |
| 	subq  $7*8,%rsp
 | |
| 	CFI_ADJUST_CFA_OFFSET 7*8
 | |
| 	movq  %rbx,2*8(%rsp)
 | |
| 	CFI_REL_OFFSET rbx, 2*8
 | |
| 	movq  %r12,3*8(%rsp)
 | |
| 	CFI_REL_OFFSET r12, 3*8
 | |
| 	movq  %r14,4*8(%rsp)
 | |
| 	CFI_REL_OFFSET r14, 4*8
 | |
| 	movq  %r13,5*8(%rsp)
 | |
| 	CFI_REL_OFFSET r13, 5*8
 | |
| 	movq  %rbp,6*8(%rsp)
 | |
| 	CFI_REL_OFFSET rbp, 6*8
 | |
| 
 | |
| 	movq  %r8,(%rsp)
 | |
| 	movq  %r9,1*8(%rsp)
 | |
| 	
 | |
| 	movl  %ecx,%eax
 | |
| 	movl  %edx,%ecx
 | |
| 
 | |
| 	xorl  %r9d,%r9d
 | |
| 	movq  %rcx,%r12
 | |
| 
 | |
| 	shrq  $6,%r12
 | |
| 	jz    .Lhandle_tail       /* < 64 */
 | |
| 
 | |
| 	clc
 | |
| 	
 | |
| 	/* main loop. clear in 64 byte blocks */
 | |
| 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 | |
| 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
 | |
| 	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
 | |
| 	.p2align 4
 | |
| .Lloop:
 | |
| 	source
 | |
| 	movq  (%rdi),%rbx
 | |
| 	source
 | |
| 	movq  8(%rdi),%r8
 | |
| 	source
 | |
| 	movq  16(%rdi),%r11
 | |
| 	source
 | |
| 	movq  24(%rdi),%rdx
 | |
| 
 | |
| 	source
 | |
| 	movq  32(%rdi),%r10
 | |
| 	source
 | |
| 	movq  40(%rdi),%rbp
 | |
| 	source
 | |
| 	movq  48(%rdi),%r14
 | |
| 	source
 | |
| 	movq  56(%rdi),%r13
 | |
| 		
 | |
| 	ignore 2f
 | |
| 	prefetcht0 5*64(%rdi)
 | |
| 2:							
 | |
| 	adcq  %rbx,%rax
 | |
| 	adcq  %r8,%rax
 | |
| 	adcq  %r11,%rax
 | |
| 	adcq  %rdx,%rax
 | |
| 	adcq  %r10,%rax
 | |
| 	adcq  %rbp,%rax
 | |
| 	adcq  %r14,%rax
 | |
| 	adcq  %r13,%rax
 | |
| 
 | |
| 	decl %r12d
 | |
| 	
 | |
| 	dest
 | |
| 	movq %rbx,(%rsi)
 | |
| 	dest
 | |
| 	movq %r8,8(%rsi)
 | |
| 	dest
 | |
| 	movq %r11,16(%rsi)
 | |
| 	dest
 | |
| 	movq %rdx,24(%rsi)
 | |
| 
 | |
| 	dest
 | |
| 	movq %r10,32(%rsi)
 | |
| 	dest
 | |
| 	movq %rbp,40(%rsi)
 | |
| 	dest
 | |
| 	movq %r14,48(%rsi)
 | |
| 	dest
 | |
| 	movq %r13,56(%rsi)
 | |
| 	
 | |
| 3:
 | |
| 	
 | |
| 	leaq 64(%rdi),%rdi
 | |
| 	leaq 64(%rsi),%rsi
 | |
| 
 | |
| 	jnz   .Lloop
 | |
| 
 | |
| 	adcq  %r9,%rax
 | |
| 
 | |
| 	/* do last upto 56 bytes */
 | |
| .Lhandle_tail:
 | |
| 	/* ecx:	count */
 | |
| 	movl %ecx,%r10d
 | |
| 	andl $63,%ecx
 | |
| 	shrl $3,%ecx
 | |
| 	jz 	 .Lfold
 | |
| 	clc
 | |
| 	.p2align 4
 | |
| .Lloop_8:	
 | |
| 	source
 | |
| 	movq (%rdi),%rbx
 | |
| 	adcq %rbx,%rax
 | |
| 	decl %ecx
 | |
| 	dest
 | |
| 	movq %rbx,(%rsi)
 | |
| 	leaq 8(%rsi),%rsi /* preserve carry */
 | |
| 	leaq 8(%rdi),%rdi
 | |
| 	jnz	.Lloop_8
 | |
| 	adcq %r9,%rax	/* add in carry */
 | |
| 
 | |
| .Lfold:
 | |
| 	/* reduce checksum to 32bits */
 | |
| 	movl %eax,%ebx
 | |
| 	shrq $32,%rax
 | |
| 	addl %ebx,%eax
 | |
| 	adcl %r9d,%eax
 | |
| 
 | |
| 	/* do last upto 6 bytes */	
 | |
| .Lhandle_7:
 | |
| 	movl %r10d,%ecx
 | |
| 	andl $7,%ecx
 | |
| 	shrl $1,%ecx
 | |
| 	jz   .Lhandle_1
 | |
| 	movl $2,%edx
 | |
| 	xorl %ebx,%ebx
 | |
| 	clc  
 | |
| 	.p2align 4
 | |
| .Lloop_1:	
 | |
| 	source
 | |
| 	movw (%rdi),%bx
 | |
| 	adcl %ebx,%eax
 | |
| 	decl %ecx
 | |
| 	dest
 | |
| 	movw %bx,(%rsi)
 | |
| 	leaq 2(%rdi),%rdi
 | |
| 	leaq 2(%rsi),%rsi
 | |
| 	jnz .Lloop_1
 | |
| 	adcl %r9d,%eax	/* add in carry */
 | |
| 	
 | |
| 	/* handle last odd byte */
 | |
| .Lhandle_1:
 | |
| 	testl $1,%r10d
 | |
| 	jz    .Lende
 | |
| 	xorl  %ebx,%ebx
 | |
| 	source
 | |
| 	movb (%rdi),%bl
 | |
| 	dest
 | |
| 	movb %bl,(%rsi)
 | |
| 	addl %ebx,%eax
 | |
| 	adcl %r9d,%eax		/* carry */
 | |
| 			
 | |
| 	CFI_REMEMBER_STATE
 | |
| .Lende:
 | |
| 	movq 2*8(%rsp),%rbx
 | |
| 	CFI_RESTORE rbx
 | |
| 	movq 3*8(%rsp),%r12
 | |
| 	CFI_RESTORE r12
 | |
| 	movq 4*8(%rsp),%r14
 | |
| 	CFI_RESTORE r14
 | |
| 	movq 5*8(%rsp),%r13
 | |
| 	CFI_RESTORE r13
 | |
| 	movq 6*8(%rsp),%rbp
 | |
| 	CFI_RESTORE rbp
 | |
| 	addq $7*8,%rsp
 | |
| 	CFI_ADJUST_CFA_OFFSET -7*8
 | |
| 	ret
 | |
| 	CFI_RESTORE_STATE
 | |
| 
 | |
| 	/* Exception handlers. Very simple, zeroing is done in the wrappers */
 | |
| .Lbad_source:
 | |
| 	movq (%rsp),%rax
 | |
| 	testq %rax,%rax
 | |
| 	jz   .Lende
 | |
| 	movl $-EFAULT,(%rax)
 | |
| 	jmp  .Lende
 | |
| 	
 | |
| .Lbad_dest:
 | |
| 	movq 8(%rsp),%rax
 | |
| 	testq %rax,%rax
 | |
| 	jz   .Lende	
 | |
| 	movl $-EFAULT,(%rax)
 | |
| 	jmp .Lende
 | |
| 	CFI_ENDPROC
 | |
| ENDPROC(csum_partial_copy_generic)
 |