0e833e697b
This implements XOR syndrome calculation using NEON intrinsics. As before, the module can be built for ARM and arm64 from the same source. Relative performance on a Cortex-A57 based system: raid6: int64x1 gen() 905 MB/s raid6: int64x1 xor() 881 MB/s raid6: int64x2 gen() 1343 MB/s raid6: int64x2 xor() 1286 MB/s raid6: int64x4 gen() 1896 MB/s raid6: int64x4 xor() 1321 MB/s raid6: int64x8 gen() 1773 MB/s raid6: int64x8 xor() 1165 MB/s raid6: neonx1 gen() 1834 MB/s raid6: neonx1 xor() 1278 MB/s raid6: neonx2 gen() 2528 MB/s raid6: neonx2 xor() 1942 MB/s raid6: neonx4 gen() 2888 MB/s raid6: neonx4 xor() 2334 MB/s raid6: neonx8 gen() 2957 MB/s raid6: neonx8 xor() 2232 MB/s raid6: using algorithm neonx8 gen() 2957 MB/s raid6: .... xor() 2232 MB/s, rmw enabled Cc: Markus Stockhausen <stockhausen@collogia.de> Cc: Neil Brown <neilb@suse.de> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: NeilBrown <neilb@suse.com>
71 lines
2.1 KiB
C
71 lines
2.1 KiB
C
/*
|
|
* linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics
|
|
*
|
|
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/raid/pq.h>
|
|
|
|
#ifdef __KERNEL__
|
|
#include <asm/neon.h>
|
|
#else
|
|
#define kernel_neon_begin()
|
|
#define kernel_neon_end()
|
|
#define cpu_has_neon() (1)
|
|
#endif
|
|
|
|
/*
|
|
* There are 2 reasons these wrappers are kept in a separate compilation unit
|
|
* from the actual implementations in neonN.c (generated from neon.uc by
|
|
* unroll.awk):
|
|
* - the actual implementations use NEON intrinsics, and the GCC support header
|
|
* (arm_neon.h) is not fully compatible (type wise) with the kernel;
|
|
* - the neonN.c files are compiled with -mfpu=neon and optimization enabled,
|
|
* and we have to make sure that we never use *any* NEON/VFP instructions
|
|
* outside a kernel_neon_begin()/kernel_neon_end() pair.
|
|
*/
|
|
|
|
#define RAID6_NEON_WRAPPER(_n) \
|
|
static void raid6_neon ## _n ## _gen_syndrome(int disks, \
|
|
size_t bytes, void **ptrs) \
|
|
{ \
|
|
void raid6_neon ## _n ## _gen_syndrome_real(int, \
|
|
unsigned long, void**); \
|
|
kernel_neon_begin(); \
|
|
raid6_neon ## _n ## _gen_syndrome_real(disks, \
|
|
(unsigned long)bytes, ptrs); \
|
|
kernel_neon_end(); \
|
|
} \
|
|
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
|
|
int start, int stop, \
|
|
size_t bytes, void **ptrs) \
|
|
{ \
|
|
void raid6_neon ## _n ## _xor_syndrome_real(int, \
|
|
int, int, unsigned long, void**); \
|
|
kernel_neon_begin(); \
|
|
raid6_neon ## _n ## _xor_syndrome_real(disks, \
|
|
start, stop, (unsigned long)bytes, ptrs); \
|
|
kernel_neon_end(); \
|
|
} \
|
|
struct raid6_calls const raid6_neonx ## _n = { \
|
|
raid6_neon ## _n ## _gen_syndrome, \
|
|
raid6_neon ## _n ## _xor_syndrome, \
|
|
raid6_have_neon, \
|
|
"neonx" #_n, \
|
|
0 \
|
|
}
|
|
|
|
static int raid6_have_neon(void)
|
|
{
|
|
return cpu_has_neon();
|
|
}
|
|
|
|
RAID6_NEON_WRAPPER(1);
|
|
RAID6_NEON_WRAPPER(2);
|
|
RAID6_NEON_WRAPPER(4);
|
|
RAID6_NEON_WRAPPER(8);
|