166a0f780a
There is a slightly faster way (in terms of the number of instructions being used) to calculate the position of a middle element, preserving integer overflow safeness. ./scripts/bloat-o-meter lib/bsearch.o.old lib/bsearch.o.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-24 (-24) function old new delta bsearch 122 98 -24 TEST INT array of size 100001, elements [0..100000]. gcc 7.1, Os, x86_64. a) bsearch() of existing key "100001 - 2": BASE ==== $ perf stat ./a.out Performance counter stats for './a.out': 619.445196 task-clock:u (msec) # 0.999 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 133 page-faults:u # 0.215 K/sec 1,949,517,279 cycles:u # 3.147 GHz (83.06%) 181,017,938 stalled-cycles-frontend:u # 9.29% frontend cycles idle (83.05%) 82,959,265 stalled-cycles-backend:u # 4.26% backend cycles idle (67.02%) 4,355,706,383 instructions:u # 2.23 insn per cycle # 0.04 stalled cycles per insn (83.54%) 1,051,539,242 branches:u # 1697.550 M/sec (83.54%) 15,263,381 branch-misses:u # 1.45% of all branches (83.43%) 0.620082548 seconds time elapsed PATCHED ======= $ perf stat ./a.out Performance counter stats for './a.out': 475.097316 task-clock:u (msec) # 0.999 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 135 page-faults:u # 0.284 K/sec 1,487,467,717 cycles:u # 3.131 GHz (82.95%) 186,537,162 stalled-cycles-frontend:u # 12.54% frontend cycles idle (82.93%) 28,797,869 stalled-cycles-backend:u # 1.94% backend cycles idle (67.10%) 3,807,564,203 instructions:u # 2.56 insn per cycle # 0.05 stalled cycles per insn (83.57%) 1,049,344,291 branches:u # 2208.693 M/sec (83.60%) 5,485 branch-misses:u # 0.00% of all branches (83.58%) 0.475760235 seconds time elapsed b) bsearch() of un-existing key "100001 + 2": BASE ==== $ perf stat ./a.out Performance counter stats for './a.out': 499.244480 task-clock:u (msec) # 0.999 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 132 page-faults:u # 0.264 K/sec 1,571,194,855 cycles:u # 3.147 GHz (83.18%) 13,450,980 stalled-cycles-frontend:u # 0.86% frontend cycles idle (83.18%) 21,256,072 stalled-cycles-backend:u # 1.35% backend cycles idle (66.78%) 4,171,197,909 instructions:u # 2.65 insn per cycle # 0.01 stalled cycles per insn (83.68%) 1,009,175,281 branches:u # 2021.405 M/sec (83.79%) 3,122 branch-misses:u # 0.00% of all branches (83.37%) 0.499871144 seconds time elapsed PATCHED ======= $ perf stat ./a.out Performance counter stats for './a.out': 399.023499 task-clock:u (msec) # 0.998 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 134 page-faults:u # 0.336 K/sec 1,245,793,991 cycles:u # 3.122 GHz (83.39%) 11,529,273 stalled-cycles-frontend:u # 0.93% frontend cycles idle (83.46%) 12,116,311 stalled-cycles-backend:u # 0.97% backend cycles idle (66.92%) 3,679,710,005 instructions:u # 2.95 insn per cycle # 0.00 stalled cycles per insn (83.47%) 1,009,792,625 branches:u # 2530.660 M/sec (83.46%) 2,590 branch-misses:u # 0.00% of all branches (83.12%) 0.399733539 seconds time elapsed Link: http://lkml.kernel.org/r/20170607150457.5905-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
||
---|---|---|
.. | ||
842 | ||
fonts | ||
lz4 | ||
lzo | ||
mpi | ||
raid6 | ||
reed_solomon | ||
xz | ||
zlib_deflate | ||
zlib_inflate | ||
.gitignore | ||
argv_split.c | ||
asn1_decoder.c | ||
assoc_array.c | ||
atomic64_test.c | ||
atomic64.c | ||
audit.c | ||
bcd.c | ||
bch.c | ||
bitmap.c | ||
bitrev.c | ||
bsearch.c | ||
btree.c | ||
bug.c | ||
build_OID_registry | ||
bust_spinlocks.c | ||
chacha20.c | ||
check_signature.c | ||
checksum.c | ||
clz_ctz.c | ||
clz_tab.c | ||
cmdline.c | ||
compat_audit.c | ||
cordic.c | ||
cpu_rmap.c | ||
cpumask.c | ||
crc4.c | ||
crc7.c | ||
crc8.c | ||
crc16.c | ||
crc32.c | ||
crc32defs.h | ||
crc32test.c | ||
crc-ccitt.c | ||
crc-itu-t.c | ||
crc-t10dif.c | ||
ctype.c | ||
debug_info.c | ||
debug_locks.c | ||
debugobjects.c | ||
dec_and_lock.c | ||
decompress_bunzip2.c | ||
decompress_inflate.c | ||
decompress_unlz4.c | ||
decompress_unlzma.c | ||
decompress_unlzo.c | ||
decompress_unxz.c | ||
decompress.c | ||
devres.c | ||
digsig.c | ||
div64.c | ||
dma-debug.c | ||
dma-noop.c | ||
dma-virt.c | ||
dump_stack.c | ||
dynamic_debug.c | ||
dynamic_queue_limits.c | ||
earlycpio.c | ||
errseq.c | ||
extable.c | ||
fault-inject.c | ||
fdt_empty_tree.c | ||
fdt_ro.c | ||
fdt_rw.c | ||
fdt_strerror.c | ||
fdt_sw.c | ||
fdt_wip.c | ||
fdt.c | ||
find_bit.c | ||
flex_array.c | ||
flex_proportions.c | ||
gcd.c | ||
gen_crc32table.c | ||
genalloc.c | ||
glob.c | ||
globtest.c | ||
hexdump.c | ||
hweight.c | ||
idr.c | ||
inflate.c | ||
int_sqrt.c | ||
interval_tree_test.c | ||
interval_tree.c | ||
iomap_copy.c | ||
iomap.c | ||
iommu-common.c | ||
iommu-helper.c | ||
ioremap.c | ||
iov_iter.c | ||
irq_poll.c | ||
irq_regs.c | ||
is_single_threaded.c | ||
jedec_ddr_data.c | ||
kasprintf.c | ||
Kconfig | ||
Kconfig.debug | ||
Kconfig.kasan | ||
Kconfig.kgdb | ||
Kconfig.kmemcheck | ||
Kconfig.ubsan | ||
kfifo.c | ||
klist.c | ||
kobject_uevent.c | ||
kobject.c | ||
kstrtox.c | ||
kstrtox.h | ||
lcm.c | ||
libcrc32c.c | ||
list_debug.c | ||
list_sort.c | ||
llist.c | ||
locking-selftest-hardirq.h | ||
locking-selftest-mutex.h | ||
locking-selftest-rlock-hardirq.h | ||
locking-selftest-rlock-softirq.h | ||
locking-selftest-rlock.h | ||
locking-selftest-rsem.h | ||
locking-selftest-rtmutex.h | ||
locking-selftest-softirq.h | ||
locking-selftest-spin-hardirq.h | ||
locking-selftest-spin-softirq.h | ||
locking-selftest-spin.h | ||
locking-selftest-wlock-hardirq.h | ||
locking-selftest-wlock-softirq.h | ||
locking-selftest-wlock.h | ||
locking-selftest-wsem.h | ||
locking-selftest.c | ||
lockref.c | ||
lru_cache.c | ||
Makefile | ||
memory-notifier-error-inject.c | ||
memweight.c | ||
net_utils.c | ||
netdev-notifier-error-inject.c | ||
nlattr.c | ||
nmi_backtrace.c | ||
nodemask.c | ||
notifier-error-inject.c | ||
notifier-error-inject.h | ||
of-reconfig-notifier-error-inject.c | ||
oid_registry.c | ||
once.c | ||
parman.c | ||
parser.c | ||
pci_iomap.c | ||
percpu_counter.c | ||
percpu_ida.c | ||
percpu_test.c | ||
percpu-refcount.c | ||
plist.c | ||
pm-notifier-error-inject.c | ||
prime_numbers.c | ||
radix-tree.c | ||
random32.c | ||
ratelimit.c | ||
rational.c | ||
rbtree_test.c | ||
rbtree.c | ||
reciprocal_div.c | ||
refcount.c | ||
rhashtable.c | ||
sbitmap.c | ||
scatterlist.c | ||
seq_buf.c | ||
sg_pool.c | ||
sg_split.c | ||
sha1.c | ||
show_mem.c | ||
siphash.c | ||
smp_processor_id.c | ||
sort.c | ||
stackdepot.c | ||
stmp_device.c | ||
string_helpers.c | ||
string.c | ||
strncpy_from_user.c | ||
strnlen_user.c | ||
swiotlb.c | ||
syscall.c | ||
test_bitmap.c | ||
test_bpf.c | ||
test_firmware.c | ||
test_hash.c | ||
test_hexdump.c | ||
test_kasan.c | ||
test_list_sort.c | ||
test_module.c | ||
test_parman.c | ||
test_printf.c | ||
test_rhashtable.c | ||
test_siphash.c | ||
test_sort.c | ||
test_static_key_base.c | ||
test_static_keys.c | ||
test_user_copy.c | ||
test_uuid.c | ||
test-kstrtox.c | ||
test-string_helpers.c | ||
textsearch.c | ||
timerqueue.c | ||
ts_bm.c | ||
ts_fsm.c | ||
ts_kmp.c | ||
ubsan.c | ||
ubsan.h | ||
ucs2_string.c | ||
usercopy.c | ||
uuid.c | ||
vsprintf.c | ||
win_minmax.c |