From bd959273867cb9432504b6a5d11d4ea4d3b41843 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 18 Jan 2007 15:18:02 +0000 Subject: [PATCH] Accelerate memcpy() by around 32% on large, dword-aligned copies. --- src/arch/i386/core/i386_string.c | 63 +++++++++++++++++++++++++++++ src/arch/i386/include/bits/string.h | 7 ++++ 2 files changed, 70 insertions(+) create mode 100644 src/arch/i386/core/i386_string.c diff --git a/src/arch/i386/core/i386_string.c b/src/arch/i386/core/i386_string.c new file mode 100644 index 00000000..9917363a --- /dev/null +++ b/src/arch/i386/core/i386_string.c @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/** @file + * + * Optimised string operations + * + */ + +#include + +/** + * Copy memory area + * + * @v dest Destination address + * @v src Source address + * @v len Length + * @ret dest Destination address + */ +__attribute__ (( regparm ( 3 ) )) void * __memcpy ( void *dest, + const void *src, + size_t len ) { + void *edi = dest; + const void *esi = src; + int discard_ecx; + + /* We often do large dword-aligned and dword-length block + * moves. Using movsl rather than movsb speeds these up by + * around 32%. + */ + if ( len >> 2 ) { + __asm__ __volatile__ ( "rep movsl" + : "=&D" ( edi ), "=&S" ( esi ), + "=&c" ( discard_ecx ) + : "0" ( edi ), "1" ( esi ), + "2" ( len >> 2 ) + : "memory" ); + } + if ( len & 0x02 ) { + __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi ) + : "0" ( edi ), "1" ( esi ) : "memory" ); + } + if ( len & 0x01 ) { + __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi ) + : "0" ( edi ), "1" ( esi ) : "memory" ); + } + return dest; +} diff --git a/src/arch/i386/include/bits/string.h b/src/arch/i386/include/bits/string.h index 46772260..c05a7df8 100644 --- a/src/arch/i386/include/bits/string.h +++ b/src/arch/i386/include/bits/string.h @@ -22,6 +22,12 @@ */ #define __HAVE_ARCH_MEMCPY + +extern __attribute__ (( regparm ( 3 ) )) void * __memcpy ( void *dest, + const void *src, + size_t len ); + +#if 0 static inline __attribute__ (( always_inline )) void * __memcpy ( void *dest, const void *src, size_t len ) { int d0, d1, d2; @@ -31,6 +37,7 @@ __memcpy ( void *dest, const void *src, size_t len ) { : "memory" ); return dest; } +#endif static inline __attribute__ (( always_inline )) void * __constant_memcpy ( void *dest, const void *src, size_t len ) {