122 lines
1.7 KiB
ArmAsm
122 lines
1.7 KiB
ArmAsm
|
/* Copyright 2002 Andi Kleen */
|
||
|
|
||
|
#include <asm/cpufeature.h>
|
||
|
/*
|
||
|
* memcpy - Copy a memory block.
|
||
|
*
|
||
|
* Input:
|
||
|
* rdi destination
|
||
|
* rsi source
|
||
|
* rdx count
|
||
|
*
|
||
|
* Output:
|
||
|
* rax original destination
|
||
|
*/
|
||
|
|
||
|
.globl __memcpy
|
||
|
.globl memcpy
|
||
|
.p2align 4
|
||
|
__memcpy:
|
||
|
memcpy:
|
||
|
pushq %rbx
|
||
|
movq %rdi,%rax
|
||
|
|
||
|
movl %edx,%ecx
|
||
|
shrl $6,%ecx
|
||
|
jz .Lhandle_tail
|
||
|
|
||
|
.p2align 4
|
||
|
.Lloop_64:
|
||
|
decl %ecx
|
||
|
|
||
|
movq (%rsi),%r11
|
||
|
movq 8(%rsi),%r8
|
||
|
|
||
|
movq %r11,(%rdi)
|
||
|
movq %r8,1*8(%rdi)
|
||
|
|
||
|
movq 2*8(%rsi),%r9
|
||
|
movq 3*8(%rsi),%r10
|
||
|
|
||
|
movq %r9,2*8(%rdi)
|
||
|
movq %r10,3*8(%rdi)
|
||
|
|
||
|
movq 4*8(%rsi),%r11
|
||
|
movq 5*8(%rsi),%r8
|
||
|
|
||
|
movq %r11,4*8(%rdi)
|
||
|
movq %r8,5*8(%rdi)
|
||
|
|
||
|
movq 6*8(%rsi),%r9
|
||
|
movq 7*8(%rsi),%r10
|
||
|
|
||
|
movq %r9,6*8(%rdi)
|
||
|
movq %r10,7*8(%rdi)
|
||
|
|
||
|
leaq 64(%rsi),%rsi
|
||
|
leaq 64(%rdi),%rdi
|
||
|
jnz .Lloop_64
|
||
|
|
||
|
.Lhandle_tail:
|
||
|
movl %edx,%ecx
|
||
|
andl $63,%ecx
|
||
|
shrl $3,%ecx
|
||
|
jz .Lhandle_7
|
||
|
.p2align 4
|
||
|
.Lloop_8:
|
||
|
decl %ecx
|
||
|
movq (%rsi),%r8
|
||
|
movq %r8,(%rdi)
|
||
|
leaq 8(%rdi),%rdi
|
||
|
leaq 8(%rsi),%rsi
|
||
|
jnz .Lloop_8
|
||
|
|
||
|
.Lhandle_7:
|
||
|
movl %edx,%ecx
|
||
|
andl $7,%ecx
|
||
|
jz .Lende
|
||
|
.p2align 4
|
||
|
.Lloop_1:
|
||
|
movb (%rsi),%r8b
|
||
|
movb %r8b,(%rdi)
|
||
|
incq %rdi
|
||
|
incq %rsi
|
||
|
decl %ecx
|
||
|
jnz .Lloop_1
|
||
|
|
||
|
.Lende:
|
||
|
popq %rbx
|
||
|
ret
|
||
|
.Lfinal:
|
||
|
|
||
|
/* C stepping K8 run faster using the string copy instructions.
|
||
|
It is also a lot simpler. Use this when possible */
|
||
|
|
||
|
.section .altinstructions,"a"
|
||
|
.align 8
|
||
|
.quad memcpy
|
||
|
.quad memcpy_c
|
||
|
.byte X86_FEATURE_K8_C
|
||
|
.byte .Lfinal-memcpy
|
||
|
.byte memcpy_c_end-memcpy_c
|
||
|
.previous
|
||
|
|
||
|
.section .altinstr_replacement,"ax"
|
||
|
/* rdi destination
|
||
|
* rsi source
|
||
|
* rdx count
|
||
|
*/
|
||
|
memcpy_c:
|
||
|
movq %rdi,%rax
|
||
|
movl %edx,%ecx
|
||
|
shrl $3,%ecx
|
||
|
andl $7,%edx
|
||
|
rep
|
||
|
movsq
|
||
|
movl %edx,%ecx
|
||
|
rep
|
||
|
movsb
|
||
|
ret
|
||
|
memcpy_c_end:
|
||
|
.previous
|