6ebbf2ce43
ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
213 lines
4.0 KiB
ArmAsm
213 lines
4.0 KiB
ArmAsm
/*
|
|
* linux/arch/arm/lib/div64.S
|
|
*
|
|
* Optimized computation of 64-bit dividend / 32-bit divisor
|
|
*
|
|
* Author: Nicolas Pitre
|
|
* Created: Oct 5, 2003
|
|
* Copyright: Monta Vista Software, Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/unwind.h>
|
|
|
|
#ifdef __ARMEB__
|
|
#define xh r0
|
|
#define xl r1
|
|
#define yh r2
|
|
#define yl r3
|
|
#else
|
|
#define xl r0
|
|
#define xh r1
|
|
#define yl r2
|
|
#define yh r3
|
|
#endif
|
|
|
|
/*
|
|
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
|
|
*
|
|
* Note: Calling convention is totally non standard for optimal code.
|
|
* This is meant to be used by do_div() from include/asm/div64.h only.
|
|
*
|
|
* Input parameters:
|
|
* xh-xl = dividend (clobbered)
|
|
* r4 = divisor (preserved)
|
|
*
|
|
* Output values:
|
|
* yh-yl = result
|
|
* xh = remainder
|
|
*
|
|
* Clobbered regs: xl, ip
|
|
*/
|
|
|
|
ENTRY(__do_div64)
|
|
UNWIND(.fnstart)
|
|
|
|
@ Test for easy paths first.
|
|
subs ip, r4, #1
|
|
bls 9f @ divisor is 0 or 1
|
|
tst ip, r4
|
|
beq 8f @ divisor is power of 2
|
|
|
|
@ See if we need to handle upper 32-bit result.
|
|
cmp xh, r4
|
|
mov yh, #0
|
|
blo 3f
|
|
|
|
@ Align divisor with upper part of dividend.
|
|
@ The aligned divisor is stored in yl preserving the original.
|
|
@ The bit position is stored in ip.
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz yl, r4
|
|
clz ip, xh
|
|
sub yl, yl, ip
|
|
mov ip, #1
|
|
mov ip, ip, lsl yl
|
|
mov yl, r4, lsl yl
|
|
|
|
#else
|
|
|
|
mov yl, r4
|
|
mov ip, #1
|
|
1: cmp yl, #0x80000000
|
|
cmpcc yl, xh
|
|
movcc yl, yl, lsl #1
|
|
movcc ip, ip, lsl #1
|
|
bcc 1b
|
|
|
|
#endif
|
|
|
|
@ The division loop for needed upper bit positions.
|
|
@ Break out early if dividend reaches 0.
|
|
2: cmp xh, yl
|
|
orrcs yh, yh, ip
|
|
subcss xh, xh, yl
|
|
movnes ip, ip, lsr #1
|
|
mov yl, yl, lsr #1
|
|
bne 2b
|
|
|
|
@ See if we need to handle lower 32-bit result.
|
|
3: cmp xh, #0
|
|
mov yl, #0
|
|
cmpeq xl, r4
|
|
movlo xh, xl
|
|
retlo lr
|
|
|
|
@ The division loop for lower bit positions.
|
|
@ Here we shift remainer bits leftwards rather than moving the
|
|
@ divisor for comparisons, considering the carry-out bit as well.
|
|
mov ip, #0x80000000
|
|
4: movs xl, xl, lsl #1
|
|
adcs xh, xh, xh
|
|
beq 6f
|
|
cmpcc xh, r4
|
|
5: orrcs yl, yl, ip
|
|
subcs xh, xh, r4
|
|
movs ip, ip, lsr #1
|
|
bne 4b
|
|
ret lr
|
|
|
|
@ The top part of remainder became zero. If carry is set
|
|
@ (the 33th bit) this is a false positive so resume the loop.
|
|
@ Otherwise, if lower part is also null then we are done.
|
|
6: bcs 5b
|
|
cmp xl, #0
|
|
reteq lr
|
|
|
|
@ We still have remainer bits in the low part. Bring them up.
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz xh, xl @ we know xh is zero here so...
|
|
add xh, xh, #1
|
|
mov xl, xl, lsl xh
|
|
mov ip, ip, lsr xh
|
|
|
|
#else
|
|
|
|
7: movs xl, xl, lsl #1
|
|
mov ip, ip, lsr #1
|
|
bcc 7b
|
|
|
|
#endif
|
|
|
|
@ Current remainder is now 1. It is worthless to compare with
|
|
@ divisor at this point since divisor can not be smaller than 3 here.
|
|
@ If possible, branch for another shift in the division loop.
|
|
@ If no bit position left then we are done.
|
|
movs ip, ip, lsr #1
|
|
mov xh, #1
|
|
bne 4b
|
|
ret lr
|
|
|
|
8: @ Division by a power of 2: determine what that divisor order is
|
|
@ then simply shift values around
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz ip, r4
|
|
rsb ip, ip, #31
|
|
|
|
#else
|
|
|
|
mov yl, r4
|
|
cmp r4, #(1 << 16)
|
|
mov ip, #0
|
|
movhs yl, yl, lsr #16
|
|
movhs ip, #16
|
|
|
|
cmp yl, #(1 << 8)
|
|
movhs yl, yl, lsr #8
|
|
addhs ip, ip, #8
|
|
|
|
cmp yl, #(1 << 4)
|
|
movhs yl, yl, lsr #4
|
|
addhs ip, ip, #4
|
|
|
|
cmp yl, #(1 << 2)
|
|
addhi ip, ip, #3
|
|
addls ip, ip, yl, lsr #1
|
|
|
|
#endif
|
|
|
|
mov yh, xh, lsr ip
|
|
mov yl, xl, lsr ip
|
|
rsb ip, ip, #32
|
|
ARM( orr yl, yl, xh, lsl ip )
|
|
THUMB( lsl xh, xh, ip )
|
|
THUMB( orr yl, yl, xh )
|
|
mov xh, xl, lsl ip
|
|
mov xh, xh, lsr ip
|
|
ret lr
|
|
|
|
@ eq -> division by 1: obvious enough...
|
|
9: moveq yl, xl
|
|
moveq yh, xh
|
|
moveq xh, #0
|
|
reteq lr
|
|
UNWIND(.fnend)
|
|
|
|
UNWIND(.fnstart)
|
|
UNWIND(.pad #4)
|
|
UNWIND(.save {lr})
|
|
Ldiv0_64:
|
|
@ Division by 0:
|
|
str lr, [sp, #-8]!
|
|
bl __div0
|
|
|
|
@ as wrong as it could be...
|
|
mov yl, #0
|
|
mov yh, #0
|
|
mov xh, #0
|
|
ldr pc, [sp], #8
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__do_div64)
|