Add patch to improve performance in some Raspberry Pi use cases

2016-10-22 15:11:23 +01:00 · 2016-10-22 15:11:23 +01:00 · 6651ada7db
parent 5950c82692
commit 6651ada7db
2 changed files with 90 additions and 1 deletions
--- a/mesa.spec
+++ b/mesa.spec
@ -46,7 +46,7 @@
 Name:           mesa
 Summary:        Mesa graphics libraries
 Version:        12.0.3
-Release:        2%{?rctag:.%{rctag}}%{?dist}
+Release:        3%{?rctag:.%{rctag}}%{?dist}

 License:        MIT
 URL:            http://www.mesa3d.org
@ -67,6 +67,7 @@ Patch4:         0004-bigendian-assert.patch
 Patch5:         0001-pipe_loader_sw-Fix-fd-leak-when-instantiated-via-pip.patch
 Patch6:         0001-loader-dri3-add-get_dri_screen-to-the-vtable.patch
 Patch7:         0002-loader-dri3-import-prime-buffers-in-the-currently-bo.patch
+Patch8:         vc4-avoid-texture-load.patch

 BuildRequires:  gcc
 BuildRequires:  gcc-c++
@ -622,6 +623,9 @@ popd
 %endif

 %changelog
+* Sat Oct 22 2016 Peter Robinson <pbrobinson@fedoraproject.org> 12.0.3-3
+- Add patch to improve performance in some Raspberry Pi use cases
+
 * Tue Oct 11 2016 Hans de Goede <hdegoede@redhat.com> - 12.0.3-2
 - Add 2 patches from upstream to fix DRI3 vaapi crashes (rhbz1309446, fdo71759)

--- a/vc4-avoid-texture-load.patch
+++ b/vc4-avoid-texture-load.patch
@ -0,0 +1,85 @@
+From 99d790538de2e7d7d489a8638b13c5aa069c27c3 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Thu, 13 Oct 2016 12:37:59 -0700
+Subject: vc4: Avoid loading from the texture during non-utile-aligned
+ glTexImage().
+
+Previously, the plan was "if the width/height we have to load/store isn't
+the size the user is planning on writing, then we need to load the old
+contents out beforehand to prevent writing back undefined".
+
+However, when we're doing glTexImage() we often end up aligning the
+width/height into the padding of the texture, and we don't actually
+need to read out that padding.
+
+Improves x11perf -aatrapezoid100 performance from ~460/sec to
+~700/sec.
+
+diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
+index 4168079..704cd71 100644
+--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
+@@ -284,26 +284,48 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
+                         return NULL;
+ 
+                 /* We need to align the box to utile boundaries, since that's
+-                 * what load/store operate on.
+                 * what load/store operates on.  This may cause us to need to
+                 * read out the original contents in that border area.  Right
+                 * now we just read out the entire contents, including the
+                 * middle area that will just get overwritten.
+                  */
+-                uint32_t orig_width = ptrans->box.width;
+-                uint32_t orig_height = ptrans->box.height;
+                 uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
+                 uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
+-                ptrans->box.width += box_start_x;
+-                ptrans->box.x -= box_start_x;
+-                ptrans->box.height += box_start_y;
+-                ptrans->box.y -= box_start_y;
+-                ptrans->box.width = align(ptrans->box.width, utile_w);
+-                ptrans->box.height = align(ptrans->box.height, utile_h);
+                bool needs_load = (usage & PIPE_TRANSFER_READ) != 0;
+
+                if (box_start_x) {
+                        ptrans->box.width += box_start_x;
+                        ptrans->box.x -= box_start_x;
+                        needs_load = true;
+                }
+                if (box_start_y) {
+                        ptrans->box.height += box_start_y;
+                        ptrans->box.y -= box_start_y;
+                        needs_load = true;
+                }
+                if (ptrans->box.width & (utile_w - 1)) {
+                        /* We only need to force a load if our border region
+                         * we're extending into is actually part of the
+                         * texture.
+                         */
+                        uint32_t slice_width = u_minify(prsc->width0, level);
+                        if (ptrans->box.x + ptrans->box.width != slice_width)
+                                needs_load = true;
+                        ptrans->box.width = align(ptrans->box.width, utile_w);
+                }
+                if (ptrans->box.height & (utile_h - 1)) {
+                        uint32_t slice_height = u_minify(prsc->height0, level);
+                        if (ptrans->box.y + ptrans->box.height != slice_height)
+                                needs_load = true;
+                        ptrans->box.height = align(ptrans->box.height, utile_h);
+                }
+ 
+                 ptrans->stride = ptrans->box.width * rsc->cpp;
+                 ptrans->layer_stride = ptrans->stride * ptrans->box.height;
+ 
+                 trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
+-                if (usage & PIPE_TRANSFER_READ ||
+-                    ptrans->box.width != orig_width ||
+-                    ptrans->box.height != orig_height) {
+
+                if (needs_load) {
+                         vc4_load_tiled_image(trans->map, ptrans->stride,
+                                              buf + slice->offset +
+                                              ptrans->box.z * rsc->cube_map_stride,
+-- 
+cgit v0.10.2
+