Add patch to improve performance in some Raspberry Pi use cases

This commit is contained in:
Peter Robinson 2016-10-22 15:11:23 +01:00
parent 5950c82692
commit 6651ada7db
2 changed files with 90 additions and 1 deletions

View File

@ -46,7 +46,7 @@
Name: mesa
Summary: Mesa graphics libraries
Version: 12.0.3
Release: 2%{?rctag:.%{rctag}}%{?dist}
Release: 3%{?rctag:.%{rctag}}%{?dist}
License: MIT
URL: http://www.mesa3d.org
@ -67,6 +67,7 @@ Patch4: 0004-bigendian-assert.patch
Patch5: 0001-pipe_loader_sw-Fix-fd-leak-when-instantiated-via-pip.patch
Patch6: 0001-loader-dri3-add-get_dri_screen-to-the-vtable.patch
Patch7: 0002-loader-dri3-import-prime-buffers-in-the-currently-bo.patch
Patch8: vc4-avoid-texture-load.patch
BuildRequires: gcc
BuildRequires: gcc-c++
@ -622,6 +623,9 @@ popd
%endif
%changelog
* Sat Oct 22 2016 Peter Robinson <pbrobinson@fedoraproject.org> 12.0.3-3
- Add patch to improve performance in some Raspberry Pi use cases
* Tue Oct 11 2016 Hans de Goede <hdegoede@redhat.com> - 12.0.3-2
- Add 2 patches from upstream to fix DRI3 vaapi crashes (rhbz1309446, fdo71759)

View File

@ -0,0 +1,85 @@
From 99d790538de2e7d7d489a8638b13c5aa069c27c3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 13 Oct 2016 12:37:59 -0700
Subject: vc4: Avoid loading from the texture during non-utile-aligned
glTexImage().
Previously, the plan was "if the width/height we have to load/store isn't
the size the user is planning on writing, then we need to load the old
contents out beforehand to prevent writing back undefined".
However, when we're doing glTexImage() we often end up aligning the
width/height into the padding of the texture, and we don't actually
need to read out that padding.
Improves x11perf -aatrapezoid100 performance from ~460/sec to
~700/sec.
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 4168079..704cd71 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -284,26 +284,48 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
return NULL;
/* We need to align the box to utile boundaries, since that's
- * what load/store operate on.
+ * what load/store operates on. This may cause us to need to
+ * read out the original contents in that border area. Right
+ * now we just read out the entire contents, including the
+ * middle area that will just get overwritten.
*/
- uint32_t orig_width = ptrans->box.width;
- uint32_t orig_height = ptrans->box.height;
uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
- ptrans->box.width += box_start_x;
- ptrans->box.x -= box_start_x;
- ptrans->box.height += box_start_y;
- ptrans->box.y -= box_start_y;
- ptrans->box.width = align(ptrans->box.width, utile_w);
- ptrans->box.height = align(ptrans->box.height, utile_h);
+ bool needs_load = (usage & PIPE_TRANSFER_READ) != 0;
+
+ if (box_start_x) {
+ ptrans->box.width += box_start_x;
+ ptrans->box.x -= box_start_x;
+ needs_load = true;
+ }
+ if (box_start_y) {
+ ptrans->box.height += box_start_y;
+ ptrans->box.y -= box_start_y;
+ needs_load = true;
+ }
+ if (ptrans->box.width & (utile_w - 1)) {
+ /* We only need to force a load if our border region
+ * we're extending into is actually part of the
+ * texture.
+ */
+ uint32_t slice_width = u_minify(prsc->width0, level);
+ if (ptrans->box.x + ptrans->box.width != slice_width)
+ needs_load = true;
+ ptrans->box.width = align(ptrans->box.width, utile_w);
+ }
+ if (ptrans->box.height & (utile_h - 1)) {
+ uint32_t slice_height = u_minify(prsc->height0, level);
+ if (ptrans->box.y + ptrans->box.height != slice_height)
+ needs_load = true;
+ ptrans->box.height = align(ptrans->box.height, utile_h);
+ }
ptrans->stride = ptrans->box.width * rsc->cpp;
ptrans->layer_stride = ptrans->stride * ptrans->box.height;
trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
- if (usage & PIPE_TRANSFER_READ ||
- ptrans->box.width != orig_width ||
- ptrans->box.height != orig_height) {
+
+ if (needs_load) {
vc4_load_tiled_image(trans->map, ptrans->stride,
buf + slice->offset +
ptrans->box.z * rsc->cube_map_stride,
--
cgit v0.10.2