019cd46984
Most crypto drivers involving kernel mode NEON take care to put the code that actually touches the NEON register file in a separate compilation unit, to prevent the compiler from reordering code that preserves or restores the NEON context with code that may corrupt it. This is necessary because we currently have no way to express the restrictions imposed upon use of the NEON in kernel mode in a way that the compiler understands. However, in the case of aes-ce-cipher, it did not seem unreasonable to deviate from this rule, given how it does not seem possible for the compiler to reorder cross object function calls with asm blocks whose in- and output constraints reflect that it reads from and writes to memory. Now that LTO is being proposed for the arm64 kernel, it is time to revisit this. The link time optimization may replace the function calls to kernel_neon_begin() and kernel_neon_end() with instantiations of the IR that make up its implementation, allowing further reordering with the asm block. So let's clean this up, and move the asm() blocks into a separate .S file. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Reviewed-By: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
88 lines
1.8 KiB
ArmAsm
88 lines
1.8 KiB
ArmAsm
/*
|
|
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
.arch armv8-a+crypto
|
|
|
|
ENTRY(__aes_ce_encrypt)
|
|
sub w3, w3, #2
|
|
ld1 {v0.16b}, [x2]
|
|
ld1 {v1.4s}, [x0], #16
|
|
cmp w3, #10
|
|
bmi 0f
|
|
bne 3f
|
|
mov v3.16b, v1.16b
|
|
b 2f
|
|
0: mov v2.16b, v1.16b
|
|
ld1 {v3.4s}, [x0], #16
|
|
1: aese v0.16b, v2.16b
|
|
aesmc v0.16b, v0.16b
|
|
2: ld1 {v1.4s}, [x0], #16
|
|
aese v0.16b, v3.16b
|
|
aesmc v0.16b, v0.16b
|
|
3: ld1 {v2.4s}, [x0], #16
|
|
subs w3, w3, #3
|
|
aese v0.16b, v1.16b
|
|
aesmc v0.16b, v0.16b
|
|
ld1 {v3.4s}, [x0], #16
|
|
bpl 1b
|
|
aese v0.16b, v2.16b
|
|
eor v0.16b, v0.16b, v3.16b
|
|
st1 {v0.16b}, [x1]
|
|
ret
|
|
ENDPROC(__aes_ce_encrypt)
|
|
|
|
ENTRY(__aes_ce_decrypt)
|
|
sub w3, w3, #2
|
|
ld1 {v0.16b}, [x2]
|
|
ld1 {v1.4s}, [x0], #16
|
|
cmp w3, #10
|
|
bmi 0f
|
|
bne 3f
|
|
mov v3.16b, v1.16b
|
|
b 2f
|
|
0: mov v2.16b, v1.16b
|
|
ld1 {v3.4s}, [x0], #16
|
|
1: aesd v0.16b, v2.16b
|
|
aesimc v0.16b, v0.16b
|
|
2: ld1 {v1.4s}, [x0], #16
|
|
aesd v0.16b, v3.16b
|
|
aesimc v0.16b, v0.16b
|
|
3: ld1 {v2.4s}, [x0], #16
|
|
subs w3, w3, #3
|
|
aesd v0.16b, v1.16b
|
|
aesimc v0.16b, v0.16b
|
|
ld1 {v3.4s}, [x0], #16
|
|
bpl 1b
|
|
aesd v0.16b, v2.16b
|
|
eor v0.16b, v0.16b, v3.16b
|
|
st1 {v0.16b}, [x1]
|
|
ret
|
|
ENDPROC(__aes_ce_decrypt)
|
|
|
|
/*
|
|
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
|
|
* substitution on each byte in 'input'
|
|
*/
|
|
ENTRY(__aes_ce_sub)
|
|
dup v1.4s, w0
|
|
movi v0.16b, #0
|
|
aese v0.16b, v1.16b
|
|
umov w0, v0.s[0]
|
|
ret
|
|
ENDPROC(__aes_ce_sub)
|
|
|
|
ENTRY(__aes_ce_invert)
|
|
ld1 {v0.4s}, [x1]
|
|
aesimc v1.16b, v0.16b
|
|
st1 {v1.4s}, [x0]
|
|
ret
|
|
ENDPROC(__aes_ce_invert)
|