From 6651ada7db8654cfd23403e2f3542ea168919f11 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 22 Oct 2016 15:11:23 +0100 Subject: [PATCH] Add patch to improve performance in some Raspberry Pi use cases --- mesa.spec | 6 ++- vc4-avoid-texture-load.patch | 85 ++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 vc4-avoid-texture-load.patch diff --git a/mesa.spec b/mesa.spec index 755197c..695322a 100644 --- a/mesa.spec +++ b/mesa.spec @@ -46,7 +46,7 @@ Name: mesa Summary: Mesa graphics libraries Version: 12.0.3 -Release: 2%{?rctag:.%{rctag}}%{?dist} +Release: 3%{?rctag:.%{rctag}}%{?dist} License: MIT URL: http://www.mesa3d.org @@ -67,6 +67,7 @@ Patch4: 0004-bigendian-assert.patch Patch5: 0001-pipe_loader_sw-Fix-fd-leak-when-instantiated-via-pip.patch Patch6: 0001-loader-dri3-add-get_dri_screen-to-the-vtable.patch Patch7: 0002-loader-dri3-import-prime-buffers-in-the-currently-bo.patch +Patch8: vc4-avoid-texture-load.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -622,6 +623,9 @@ popd %endif %changelog +* Sat Oct 22 2016 Peter Robinson 12.0.3-3 +- Add patch to improve performance in some Raspberry Pi use cases + * Tue Oct 11 2016 Hans de Goede - 12.0.3-2 - Add 2 patches from upstream to fix DRI3 vaapi crashes (rhbz1309446, fdo71759) diff --git a/vc4-avoid-texture-load.patch b/vc4-avoid-texture-load.patch new file mode 100644 index 0000000..a687283 --- /dev/null +++ b/vc4-avoid-texture-load.patch @@ -0,0 +1,85 @@ +From 99d790538de2e7d7d489a8638b13c5aa069c27c3 Mon Sep 17 00:00:00 2001 +From: Eric Anholt +Date: Thu, 13 Oct 2016 12:37:59 -0700 +Subject: vc4: Avoid loading from the texture during non-utile-aligned + glTexImage(). + +Previously, the plan was "if the width/height we have to load/store isn't +the size the user is planning on writing, then we need to load the old +contents out beforehand to prevent writing back undefined". + +However, when we're doing glTexImage() we often end up aligning the +width/height into the padding of the texture, and we don't actually +need to read out that padding. + +Improves x11perf -aatrapezoid100 performance from ~460/sec to +~700/sec. + +diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c +index 4168079..704cd71 100644 +--- a/src/gallium/drivers/vc4/vc4_resource.c ++++ b/src/gallium/drivers/vc4/vc4_resource.c +@@ -284,26 +284,48 @@ vc4_resource_transfer_map(struct pipe_context *pctx, + return NULL; + + /* We need to align the box to utile boundaries, since that's +- * what load/store operate on. ++ * what load/store operates on. This may cause us to need to ++ * read out the original contents in that border area. Right ++ * now we just read out the entire contents, including the ++ * middle area that will just get overwritten. + */ +- uint32_t orig_width = ptrans->box.width; +- uint32_t orig_height = ptrans->box.height; + uint32_t box_start_x = ptrans->box.x & (utile_w - 1); + uint32_t box_start_y = ptrans->box.y & (utile_h - 1); +- ptrans->box.width += box_start_x; +- ptrans->box.x -= box_start_x; +- ptrans->box.height += box_start_y; +- ptrans->box.y -= box_start_y; +- ptrans->box.width = align(ptrans->box.width, utile_w); +- ptrans->box.height = align(ptrans->box.height, utile_h); ++ bool needs_load = (usage & PIPE_TRANSFER_READ) != 0; ++ ++ if (box_start_x) { ++ ptrans->box.width += box_start_x; ++ ptrans->box.x -= box_start_x; ++ needs_load = true; ++ } ++ if (box_start_y) { ++ ptrans->box.height += box_start_y; ++ ptrans->box.y -= box_start_y; ++ needs_load = true; ++ } ++ if (ptrans->box.width & (utile_w - 1)) { ++ /* We only need to force a load if our border region ++ * we're extending into is actually part of the ++ * texture. ++ */ ++ uint32_t slice_width = u_minify(prsc->width0, level); ++ if (ptrans->box.x + ptrans->box.width != slice_width) ++ needs_load = true; ++ ptrans->box.width = align(ptrans->box.width, utile_w); ++ } ++ if (ptrans->box.height & (utile_h - 1)) { ++ uint32_t slice_height = u_minify(prsc->height0, level); ++ if (ptrans->box.y + ptrans->box.height != slice_height) ++ needs_load = true; ++ ptrans->box.height = align(ptrans->box.height, utile_h); ++ } + + ptrans->stride = ptrans->box.width * rsc->cpp; + ptrans->layer_stride = ptrans->stride * ptrans->box.height; + + trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); +- if (usage & PIPE_TRANSFER_READ || +- ptrans->box.width != orig_width || +- ptrans->box.height != orig_height) { ++ ++ if (needs_load) { + vc4_load_tiled_image(trans->map, ptrans->stride, + buf + slice->offset + + ptrans->box.z * rsc->cube_map_stride, +-- +cgit v0.10.2 +