2
0
mirror of https://github.com/xcat2/xNBA.git synced 2025-01-18 21:43:14 +00:00

Accelerate memcpy() by around 32% on large, dword-aligned copies.

This commit is contained in:
Michael Brown 2007-01-18 15:18:02 +00:00
parent c8b3e969a0
commit bd95927386
2 changed files with 70 additions and 0 deletions

View File

@ -0,0 +1,63 @@
/*
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/** @file
*
* Optimised string operations
*
*/
#include <string.h>
/**
* Copy memory area
*
* @v dest Destination address
* @v src Source address
* @v len Length
* @ret dest Destination address
*/
__attribute__ (( regparm ( 3 ) )) void * __memcpy ( void *dest,
const void *src,
size_t len ) {
void *edi = dest;
const void *esi = src;
int discard_ecx;
/* We often do large dword-aligned and dword-length block
* moves. Using movsl rather than movsb speeds these up by
* around 32%.
*/
if ( len >> 2 ) {
__asm__ __volatile__ ( "rep movsl"
: "=&D" ( edi ), "=&S" ( esi ),
"=&c" ( discard_ecx )
: "0" ( edi ), "1" ( esi ),
"2" ( len >> 2 )
: "memory" );
}
if ( len & 0x02 ) {
__asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
: "0" ( edi ), "1" ( esi ) : "memory" );
}
if ( len & 0x01 ) {
__asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
: "0" ( edi ), "1" ( esi ) : "memory" );
}
return dest;
}

View File

@ -22,6 +22,12 @@
*/
#define __HAVE_ARCH_MEMCPY
extern __attribute__ (( regparm ( 3 ) )) void * __memcpy ( void *dest,
const void *src,
size_t len );
#if 0
static inline __attribute__ (( always_inline )) void *
__memcpy ( void *dest, const void *src, size_t len ) {
int d0, d1, d2;
@ -31,6 +37,7 @@ __memcpy ( void *dest, const void *src, size_t len ) {
: "memory" );
return dest;
}
#endif
static inline __attribute__ (( always_inline )) void *
__constant_memcpy ( void *dest, const void *src, size_t len ) {