From aa9273eca064148d3097670bfb66f3ecfd52c6e0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 19 Mar 2013 11:55:32 +1000 Subject: [PATCH 1/2] add SNB hang workaround from chromium --- i965-hack-hiz-snb-fix.patch | 63 +++++++++++++++++++++++++++++++++++++ mesa.spec | 8 ++++- 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 i965-hack-hiz-snb-fix.patch diff --git a/i965-hack-hiz-snb-fix.patch b/i965-hack-hiz-snb-fix.patch new file mode 100644 index 0000000..bec102b --- /dev/null +++ b/i965-hack-hiz-snb-fix.patch @@ -0,0 +1,63 @@ +diff -up Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.c.marcheu Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.c +--- Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.c.marcheu 2013-02-20 10:26:22.000000000 +1000 ++++ Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.c 2013-03-19 10:44:12.761921622 +1000 +@@ -329,6 +329,7 @@ brwCreateContext(int api, + brw->urb.max_gs_entries = 256; + } + brw->urb.gen6_gs_previously_active = false; ++ brw->urb.prims_since_last_flush = 0; + } else if (intel->gen == 5) { + brw->urb.size = 1024; + brw->max_vs_threads = 72; +diff -up Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.h.marcheu Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.h +--- Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.h.marcheu 2013-02-23 11:45:52.000000000 +1000 ++++ Mesa-9.1/src/mesa/drivers/dri/i965/brw_context.h 2013-03-19 10:44:12.762921630 +1000 +@@ -864,6 +864,7 @@ struct brw_context + * URB space for the GS. + */ + bool gen6_gs_previously_active; ++ int prims_since_last_flush; + } urb; + + +diff -up Mesa-9.1/src/mesa/drivers/dri/i965/brw_draw.c.marcheu Mesa-9.1/src/mesa/drivers/dri/i965/brw_draw.c +--- Mesa-9.1/src/mesa/drivers/dri/i965/brw_draw.c.marcheu 2013-02-20 10:26:22.000000000 +1000 ++++ Mesa-9.1/src/mesa/drivers/dri/i965/brw_draw.c 2013-03-19 10:44:12.763921639 +1000 +@@ -294,10 +294,14 @@ static void brw_merge_inputs( struct brw + * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each + * enabled depth texture. + * ++ * We don't resolve the depth buffer's HiZ if no primitives have been drawn ++ * since the last flush. This avoids a case where we lockup the GPU on boot ++ * when this is the first thing we do. ++ * + * (In the future, this will also perform MSAA resolves). + */ + static void +-brw_predraw_resolve_buffers(struct brw_context *brw) ++brw_predraw_resolve_buffers(struct brw_context *brw, int nr_prims) + { + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; +@@ -306,9 +310,11 @@ brw_predraw_resolve_buffers(struct brw_c + + /* Resolve the depth buffer's HiZ buffer. */ + depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); +- if (depth_irb) ++ if (depth_irb && brw->urb.prims_since_last_flush > 0 ) + intel_renderbuffer_resolve_hiz(intel, depth_irb); + ++ brw->urb.prims_since_last_flush = nr_prims; ++ + /* Resolve depth buffer of each enabled depth texture. */ + for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (!ctx->Texture.Unit[i]._ReallyEnabled) +@@ -445,7 +451,7 @@ static bool brw_try_draw_prims( struct g + * and finalizing textures but before setting up any hardware state for + * this draw call. + */ +- brw_predraw_resolve_buffers(brw); ++ brw_predraw_resolve_buffers(brw, nr_prims); + + /* Bind all inputs, derive varying and size information: + */ diff --git a/mesa.spec b/mesa.spec index cad6ce4..bcfa976 100644 --- a/mesa.spec +++ b/mesa.spec @@ -49,7 +49,7 @@ Summary: Mesa graphics libraries Name: mesa Version: 9.1 -Release: 1%{?dist} +Release: 2%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -72,6 +72,7 @@ Patch9: mesa-8.0-llvmpipe-shmget.patch #Patch11: mesa-8.0-nouveau-tfp-blacklist.patch Patch12: mesa-8.0.1-fix-16bpp.patch #Patch13: mesa-9.0.1-less-cxx-please.patch +Patch14: i965-hack-hiz-snb-fix.patch BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} @@ -298,6 +299,8 @@ Mesa shared glapi #%patch13 -p1 -b .less-cpp +# hack from chromium - awaiting real upstream fix +%patch14 -p1 -b .snbfix # default to dri (not xlib) for libGL on all arches # XXX please fix upstream sed -i 's/^default_driver.*$/default_driver="dri"/' configure.ac @@ -589,6 +592,9 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Tue Mar 19 2013 Dave Airlie 9.1-2 +- add SNB hang workaround from chromium + * Fri Mar 08 2013 Adam Jackson 9.1-1 - Mesa 9.1 From 6655a94c5c7e4c845cb5b6c9c3aa1df04054a48b Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 19 Mar 2013 11:50:20 -0400 Subject: [PATCH 2/2] mesa-9.1-53-gd0ccb5b.patch: Sync with today's git --- mesa-9.1-53-gd0ccb5b.patch | 1974 ++++++++++++++++++++++++++++++++++++ mesa.spec | 25 +- 2 files changed, 1988 insertions(+), 11 deletions(-) create mode 100644 mesa-9.1-53-gd0ccb5b.patch diff --git a/mesa-9.1-53-gd0ccb5b.patch b/mesa-9.1-53-gd0ccb5b.patch new file mode 100644 index 0000000..66b13fc --- /dev/null +++ b/mesa-9.1-53-gd0ccb5b.patch @@ -0,0 +1,1974 @@ +diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh +index a141afe..d3ac511 100755 +--- a/bin/get-pick-list.sh ++++ b/bin/get-pick-list.sh +@@ -8,7 +8,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\ + sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked + + # Grep for commits that were marked as a candidate for the stable tree. +-git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: This is a candidate' HEAD..origin/master |\ ++git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: .*[Cc]andidate' HEAD..origin/master |\ + while read sha + do + # Check to see whether the patch is on the ignore list. +diff --git a/common.py b/common.py +index 6ff9608..1d618e6 100644 +--- a/common.py ++++ b/common.py +@@ -100,4 +100,4 @@ def AddOptions(opts): + opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes')) + opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no')) + if host_platform == 'windows': +- opts.Add(EnumOption('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0'))) ++ opts.Add(EnumOption('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0', '10.0', '11.0'))) +diff --git a/configure.ac b/configure.ac +index 5701f8a..d75cf65 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1682,6 +1682,9 @@ if test "x$enable_gallium_llvm" = xyes; then + if $LLVM_CONFIG --components | grep -q '\'; then + LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit" + fi ++ if $LLVM_CONFIG --components | grep -q '\'; then ++ LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit" ++ fi + + if test "x$enable_opencl" = xyes; then + LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation" +diff --git a/docs/index.html b/docs/index.html +index 5c92204..5d7229d 100644 +--- a/docs/index.html ++++ b/docs/index.html +@@ -16,6 +16,23 @@ + +

News

+ ++

February 22, 2013

++ ++

++Mesa 9.1 is released. ++This is a new development release. ++See the release notes for more information about the release. ++

++ ++ ++

February 21, 2013

++ ++

++Mesa 9.0.3 is released. ++This is a bug fix release. ++

++ ++ +

January 22, 2013

+ +

+diff --git a/docs/relnotes-9.1.html b/docs/relnotes-9.1.html +index 24ba9f9..8232ab8 100644 +--- a/docs/relnotes-9.1.html ++++ b/docs/relnotes-9.1.html +@@ -14,7 +14,7 @@ + +

+ +-

Mesa 9.1 Release Notes / date February 22, 2013

++

Mesa 9.1 Release Notes / February 22, 2013

+ +

+ Mesa 9.1 is a new development release. +@@ -33,7 +33,9 @@ because GL_ARB_compatibility is not supported. + +

MD5 checksums

+
+-tbd
++86d40f3056f89949368764bf84aff55e  MesaLib-9.1.tar.gz
++d3891e02215422e120271d976ff1947e  MesaLib-9.1.tar.bz2
++01645f28f53351c23b0beb6c688911d8  MesaLib-9.1.zip
+ 
+ + +diff --git a/docs/relnotes.html b/docs/relnotes.html +index e373091..2e11bc4 100644 +--- a/docs/relnotes.html ++++ b/docs/relnotes.html +@@ -22,6 +22,7 @@ The release notes summarize what's new or changed in each Mesa release. + +
    +
  • 9.1 release notes ++
  • 9.0.3 release notes +
  • 9.0.2 release notes +
  • 9.0.1 release notes +
  • 9.0 release notes +diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h +index 09dca5b..1e388f8 100644 +--- a/include/pci_ids/i965_pci_ids.h ++++ b/include/pci_ids/i965_pci_ids.h +@@ -53,12 +53,12 @@ CHIPSET(0x0A26, HASWELL_ULT_M_GT2_PLUS, hsw_gt2) + CHIPSET(0x0A0A, HASWELL_ULT_S_GT1, hsw_gt1) + CHIPSET(0x0A1A, HASWELL_ULT_S_GT2, hsw_gt2) + CHIPSET(0x0A2A, HASWELL_ULT_S_GT2_PLUS, hsw_gt2) +-CHIPSET(0x0D12, HASWELL_CRW_GT1, hsw_gt1) +-CHIPSET(0x0D22, HASWELL_CRW_GT2, hsw_gt2) +-CHIPSET(0x0D32, HASWELL_CRW_GT2_PLUS, hsw_gt2) +-CHIPSET(0x0D16, HASWELL_CRW_M_GT1, hsw_gt1) +-CHIPSET(0x0D26, HASWELL_CRW_M_GT2, hsw_gt2) +-CHIPSET(0x0D36, HASWELL_CRW_M_GT2_PLUS, hsw_gt2) +-CHIPSET(0x0D1A, HASWELL_CRW_S_GT1, hsw_gt1) +-CHIPSET(0x0D2A, HASWELL_CRW_S_GT2, hsw_gt2) +-CHIPSET(0x0D3A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2) ++CHIPSET(0x0D02, HASWELL_CRW_GT1, hsw_gt1) ++CHIPSET(0x0D12, HASWELL_CRW_GT2, hsw_gt2) ++CHIPSET(0x0D22, HASWELL_CRW_GT2_PLUS, hsw_gt2) ++CHIPSET(0x0D06, HASWELL_CRW_M_GT1, hsw_gt1) ++CHIPSET(0x0D16, HASWELL_CRW_M_GT2, hsw_gt2) ++CHIPSET(0x0D26, HASWELL_CRW_M_GT2_PLUS, hsw_gt2) ++CHIPSET(0x0D0A, HASWELL_CRW_S_GT1, hsw_gt1) ++CHIPSET(0x0D1A, HASWELL_CRW_S_GT2, hsw_gt2) ++CHIPSET(0x0D2A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2) +diff --git a/include/pci_ids/r600_pci_ids.h b/include/pci_ids/r600_pci_ids.h +index 7ceb820..9c9bab2 100644 +--- a/include/pci_ids/r600_pci_ids.h ++++ b/include/pci_ids/r600_pci_ids.h +@@ -298,6 +298,10 @@ CHIPSET(0x9907, ARUBA_9907, ARUBA) + CHIPSET(0x9908, ARUBA_9908, ARUBA) + CHIPSET(0x9909, ARUBA_9909, ARUBA) + CHIPSET(0x990A, ARUBA_990A, ARUBA) ++CHIPSET(0x990B, ARUBA_990B, ARUBA) ++CHIPSET(0x990C, ARUBA_990C, ARUBA) ++CHIPSET(0x990D, ARUBA_990D, ARUBA) ++CHIPSET(0x990E, ARUBA_990E, ARUBA) + CHIPSET(0x990F, ARUBA_990F, ARUBA) + CHIPSET(0x9910, ARUBA_9910, ARUBA) + CHIPSET(0x9913, ARUBA_9913, ARUBA) +@@ -309,6 +313,13 @@ CHIPSET(0x9991, ARUBA_9991, ARUBA) + CHIPSET(0x9992, ARUBA_9992, ARUBA) + CHIPSET(0x9993, ARUBA_9993, ARUBA) + CHIPSET(0x9994, ARUBA_9994, ARUBA) ++CHIPSET(0x9995, ARUBA_9995, ARUBA) ++CHIPSET(0x9996, ARUBA_9996, ARUBA) ++CHIPSET(0x9997, ARUBA_9997, ARUBA) ++CHIPSET(0x9998, ARUBA_9998, ARUBA) ++CHIPSET(0x9999, ARUBA_9999, ARUBA) ++CHIPSET(0x999A, ARUBA_999A, ARUBA) ++CHIPSET(0x999B, ARUBA_999B, ARUBA) + CHIPSET(0x99A0, ARUBA_99A0, ARUBA) + CHIPSET(0x99A2, ARUBA_99A2, ARUBA) + CHIPSET(0x99A4, ARUBA_99A4, ARUBA) +diff --git a/scons/gallium.py b/scons/gallium.py +index 4b51b6e..b28be5d 100755 +--- a/scons/gallium.py ++++ b/scons/gallium.py +@@ -289,6 +289,7 @@ def generate(env): + '_CRT_SECURE_NO_DEPRECATE', + '_SCL_SECURE_NO_WARNINGS', + '_SCL_SECURE_NO_DEPRECATE', ++ '_ALLOW_KEYWORD_MACROS', + ] + if env['build'] in ('debug', 'checked'): + cppdefines += ['_DEBUG'] +@@ -401,6 +402,8 @@ def generate(env): + '/Oi', # enable intrinsic functions + ] + else: ++ if distutils.version.LooseVersion(env['MSVC_VERSION']) < distutils.version.LooseVersion('11.0'): ++ print 'scons: warning: Visual Studio versions prior to 2012 are known to produce incorrect code when optimizations are enabled ( https://bugs.freedesktop.org/show_bug.cgi?id=58718 )' + ccflags += [ + '/O2', # optimize for speed + ] +diff --git a/scons/llvm.py b/scons/llvm.py +index e1ed760..7f00c6c 100644 +--- a/scons/llvm.py ++++ b/scons/llvm.py +@@ -92,7 +92,19 @@ def generate(env): + 'HAVE_STDINT_H', + ]) + env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')]) +- if llvm_version >= distutils.version.LooseVersion('3.0'): ++ if llvm_version >= distutils.version.LooseVersion('3.2'): ++ # 3.2 ++ env.Prepend(LIBS = [ ++ 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', ++ 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG', ++ 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter', ++ 'LLVMX86Utils', 'LLVMX86Info', 'LLVMJIT', ++ 'LLVMExecutionEngine', 'LLVMCodeGen', 'LLVMScalarOpts', ++ 'LLVMInstCombine', 'LLVMTransformUtils', 'LLVMipa', ++ 'LLVMAnalysis', 'LLVMTarget', 'LLVMMC', 'LLVMCore', ++ 'LLVMSupport', 'LLVMRuntimeDyld', 'LLVMObject' ++ ]) ++ elif llvm_version >= distutils.version.LooseVersion('3.0'): + # 3.0 + env.Prepend(LIBS = [ + 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', +diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c +index 351fbf4..e17d5be 100644 +--- a/src/egl/drivers/dri2/egl_dri2.c ++++ b/src/egl/drivers/dri2/egl_dri2.c +@@ -195,7 +195,14 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, + for (i = 0; attr_list[i] != EGL_NONE; i += 2) + _eglSetConfigKey(&base, attr_list[i], attr_list[i+1]); + +- if (depth > 0 && depth != base.BufferSize) ++ /* Allow a 24-bit RGB visual to match a 32-bit RGBA EGLConfig. Otherwise ++ * it will only match a 32-bit RGBA visual. On a composited window manager ++ * on X11, this will make all of the EGLConfigs with destination alpha get ++ * blended by the compositor. This is probably not what the application ++ * wants... especially on drivers that only have 32-bit RGBA EGLConfigs! ++ */ ++ if (depth > 0 && depth != base.BufferSize ++ && !(depth == 24 && base.BufferSize == 32)) + return NULL; + + if (rgba_masks && memcmp(rgba_masks, dri_masks, sizeof(dri_masks))) +diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c +index 7b879c4..3110809 100644 +--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c ++++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c +@@ -167,12 +167,17 @@ static void interp( const struct clip_stage *clip, + { + int k; + t_nopersp = t; +- for (k = 0; k < 2; k++) ++ /* find either in.x != out.x or in.y != out.y */ ++ for (k = 0; k < 2; k++) { + if (in->clip[k] != out->clip[k]) { +- t_nopersp = (dst->clip[k] - out->clip[k]) / +- (in->clip[k] - out->clip[k]); ++ /* do divide by W, then compute linear interpolation factor */ ++ float in_coord = in->clip[k] / in->clip[3]; ++ float out_coord = out->clip[k] / out->clip[3]; ++ float dst_coord = dst->clip[k] / dst->clip[3]; ++ t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord); + break; + } ++ } + } + + /* Other attributes +diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c +index 3da52b1..3578525 100644 +--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c ++++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c +@@ -127,10 +127,44 @@ static void offset_first_tri( struct draw_stage *stage, + struct prim_header *header ) + { + struct offset_stage *offset = offset_stage(stage); ++ const struct pipe_rasterizer_state *rast = stage->draw->rasterizer; ++ unsigned fill_mode = rast->fill_front; ++ boolean do_offset; ++ ++ if (rast->fill_back != rast->fill_front) { ++ /* Need to check for back-facing triangle */ ++ boolean ccw = header->det < 0.0f; ++ if (ccw != rast->front_ccw) ++ fill_mode = rast->fill_back; ++ } ++ ++ /* Now determine if we need to do offsetting for the point/line/fill mode */ ++ switch (fill_mode) { ++ case PIPE_POLYGON_MODE_FILL: ++ do_offset = rast->offset_tri; ++ break; ++ case PIPE_POLYGON_MODE_LINE: ++ do_offset = rast->offset_line; ++ break; ++ case PIPE_POLYGON_MODE_POINT: ++ do_offset = rast->offset_point; ++ break; ++ default: ++ assert(!"invalid fill_mode in offset_first_tri()"); ++ do_offset = rast->offset_tri; ++ } ++ ++ if (do_offset) { ++ offset->scale = rast->offset_scale; ++ offset->clamp = rast->offset_clamp; ++ offset->units = (float) (rast->offset_units * stage->draw->mrd); ++ } ++ else { ++ offset->scale = 0.0f; ++ offset->clamp = 0.0f; ++ offset->units = 0.0f; ++ } + +- offset->units = (float) (stage->draw->rasterizer->offset_units * stage->draw->mrd); +- offset->scale = stage->draw->rasterizer->offset_scale; +- offset->clamp = stage->draw->rasterizer->offset_clamp; + + stage->tri = offset_tri; + stage->tri( stage, header ); +diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h +new file mode 100644 +index 0000000..4b1d0d1 +--- /dev/null ++++ b/src/gallium/auxiliary/util/u_range.h +@@ -0,0 +1,89 @@ ++/* ++ * Copyright 2013 Marek Olšák ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * on the rights to use, copy, modify, merge, publish, distribute, sub ++ * license, and/or sell copies of the Software, and to permit persons to whom ++ * the Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ ++ ++/** ++ * @file ++ * 1D integer range, capable of the union and intersection operations. ++ * ++ * It only maintains a single interval which is extended when the union is ++ * done. This implementation is partially thread-safe (readers are not ++ * protected by a lock). ++ * ++ * @author Marek Olšák ++ */ ++ ++#ifndef U_RANGE_H ++#define U_RANGE_H ++ ++#include "os/os_thread.h" ++ ++struct util_range { ++ unsigned start; /* inclusive */ ++ unsigned end; /* exclusive */ ++ ++ /* for the range to be consistent with multiple contexts: */ ++ pipe_mutex write_mutex; ++}; ++ ++ ++static INLINE void ++util_range_set_empty(struct util_range *range) ++{ ++ range->start = ~0; ++ range->end = 0; ++} ++ ++/* This is like a union of two sets. */ ++static INLINE void ++util_range_add(struct util_range *range, unsigned start, unsigned end) ++{ ++ if (start < range->start || end > range->end) { ++ pipe_mutex_lock(range->write_mutex); ++ range->start = MIN2(start, range->start); ++ range->end = MAX2(end, range->end); ++ pipe_mutex_unlock(range->write_mutex); ++ } ++} ++ ++static INLINE boolean ++util_ranges_intersect(struct util_range *range, unsigned start, unsigned end) ++{ ++ return MAX2(start, range->start) < MIN2(end, range->end); ++} ++ ++ ++/* Init/deinit */ ++ ++static INLINE void ++util_range_init(struct util_range *range) ++{ ++ pipe_mutex_init(range->write_mutex); ++ util_range_set_empty(range); ++} ++ ++static INLINE void ++util_range_destroy(struct util_range *range) ++{ ++ pipe_mutex_destroy(range->write_mutex); ++} ++ ++#endif +diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +index 40ccaf6..ca8df71 100644 +--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c ++++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +@@ -46,6 +46,10 @@ clear_flags(struct pipe_rasterizer_state *rast) + { + rast->light_twoside = 0; + rast->offset_tri = 0; ++ rast->offset_line = 0; ++ rast->offset_point = 0; ++ rast->offset_units = 0.0f; ++ rast->offset_scale = 0.0f; + } + + +@@ -74,6 +78,8 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, + */ + need_pipeline = (rast->fill_front != PIPE_POLYGON_MODE_FILL || + rast->fill_back != PIPE_POLYGON_MODE_FILL || ++ rast->offset_point || ++ rast->offset_line || + rast->point_smooth || + rast->line_smooth || + rast->line_stipple_enable || +diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c +index 2e9c6bf..f17a04a 100644 +--- a/src/gallium/drivers/llvmpipe/lp_texture.c ++++ b/src/gallium/drivers/llvmpipe/lp_texture.c +@@ -295,7 +295,9 @@ llvmpipe_resource_create(struct pipe_screen *_screen, + /* assert(lpr->base.bind); */ + + if (resource_is_texture(&lpr->base)) { +- if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) { ++ if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | ++ PIPE_BIND_SCANOUT | ++ PIPE_BIND_SHARED)) { + /* displayable surface */ + if (!llvmpipe_displaytarget_layout(screen, lpr)) + goto fail; +diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c +index bb47530..bb43353 100644 +--- a/src/gallium/drivers/r600/evergreen_hw_context.c ++++ b/src/gallium/drivers/r600/evergreen_hw_context.c +@@ -283,4 +283,7 @@ void evergreen_dma_copy(struct r600_context *rctx, + src_offset += csize << shift; + size -= csize; + } ++ ++ util_range_add(&rdst->valid_buffer_range, dst_offset, ++ dst_offset + size); + } +diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c +index 389ad3c..804c037 100644 +--- a/src/gallium/drivers/r600/evergreen_state.c ++++ b/src/gallium/drivers/r600/evergreen_state.c +@@ -808,6 +808,7 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, + dsa->valuemask[1] = state->stencil[1].valuemask; + dsa->writemask[0] = state->stencil[0].writemask; + dsa->writemask[1] = state->stencil[1].writemask; ++ dsa->zwritemask = state->depth.writemask; + + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | +@@ -1321,6 +1322,10 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, + * elements. */ + surf->cb_color_dim = pipe_buffer->width0; + ++ /* Set the buffer range the GPU will have access to: */ ++ util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range, ++ 0, pipe_buffer->width0); ++ + surf->cb_color_cmask = surf->cb_color_base; + surf->cb_color_cmask_slice = 0; + surf->cb_color_fmask = surf->cb_color_base; +@@ -1405,10 +1410,15 @@ void evergreen_init_color_surface(struct r600_context *rctx, + S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) | + S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); + +- if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) { +- unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); +- color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | +- S_028C74_NUM_FRAGMENTS(log_samples); ++ if (rctx->chip_class == CAYMAN) { ++ color_attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == ++ UTIL_FORMAT_SWIZZLE_1); ++ ++ if (rtex->resource.b.b.nr_samples > 1) { ++ unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); ++ color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | ++ S_028C74_NUM_FRAGMENTS(log_samples); ++ } + } + + ntype = V_028C70_NUMBER_UNORM; +@@ -1647,6 +1657,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, + } + if (rctx->framebuffer.state.zsbuf) { + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; ++ ++ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture; ++ if (rtex->htile) { ++ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META; ++ } + } + + util_copy_framebuffer_state(&rctx->framebuffer.state, state); +@@ -2222,7 +2237,14 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ + } + db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); + } +- if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) { ++ /* FIXME we should be able to use hyperz even if we are not writing to ++ * zbuffer but somehow this trigger GPU lockup. See : ++ * ++ * https://bugs.freedesktop.org/show_bug.cgi?id=60848 ++ * ++ * Disable hyperz for now if not writing to zbuffer. ++ */ ++ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled && rctx->zwritemask) { + /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */ + db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF); + /* This is to fix a lockup when hyperz and alpha test are enabled at +@@ -3654,6 +3676,17 @@ boolean evergreen_dma_blit(struct pipe_context *ctx, + return FALSE; + } + ++ /* 128 bpp surfaces require non_disp_tiling for both ++ * tiled and linear buffers on cayman. However, async ++ * DMA only supports it on the tiled side. As such ++ * the tile order is backwards after a L2T/T2L packet. ++ */ ++ if ((rctx->chip_class == CAYMAN) && ++ (src_mode != dst_mode) && ++ (util_format_get_blocksize(src->format) >= 16)) { ++ return FALSE; ++ } ++ + if (src_mode == dst_mode) { + uint64_t dst_offset, src_offset; + /* simple dma blit would do NOTE code here assume : +diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h +index 11dbb3b..0115293 100644 +--- a/src/gallium/drivers/r600/r600.h ++++ b/src/gallium/drivers/r600/r600.h +@@ -28,6 +28,7 @@ + + #include "../../winsys/radeon/drm/radeon_winsys.h" + #include "util/u_double_list.h" ++#include "util/u_range.h" + #include "util/u_transfer.h" + + #define R600_ERR(fmt, args...) \ +@@ -50,6 +51,16 @@ struct r600_resource { + + /* Resource state. */ + unsigned domains; ++ ++ /* The buffer range which is initialized (with a write transfer, ++ * streamout, DMA, or as a random access target). The rest of ++ * the buffer is considered invalid and can be mapped unsynchronized. ++ * ++ * This allows unsychronized mapping of a buffer range which hasn't ++ * been used yet. It's for applications which forget to use ++ * the unsynchronized map flag and expect the driver to figure it out. ++ */ ++ struct util_range valid_buffer_range; + }; + + #define R600_BLOCK_MAX_BO 32 +@@ -152,6 +163,7 @@ struct r600_so_target { + #define R600_CONTEXT_FLUSH_AND_INV (1 << 4) + #define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5) + #define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 6) ++#define R600_CONTEXT_FLUSH_AND_INV_DB_META (1 << 7) + + struct r600_context; + struct r600_screen; +diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c +index f25c6aa..bda425c 100644 +--- a/src/gallium/drivers/r600/r600_asm.c ++++ b/src/gallium/drivers/r600/r600_asm.c +@@ -322,6 +322,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecod + output->swizzle_y == bc->cf_last->output.swizzle_y && + output->swizzle_z == bc->cf_last->output.swizzle_z && + output->swizzle_w == bc->cf_last->output.swizzle_w && ++ output->comp_mask == bc->cf_last->output.comp_mask && + (output->burst_count + bc->cf_last->output.burst_count) <= 16) { + + if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && +@@ -873,12 +874,6 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc, + bank_swizzle[4] = SQ_ALU_SCL_210; + while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + +- if (max_slots == 4) { +- for (i = 0; i < max_slots; i++) { +- if (bank_swizzle[i] == SQ_ALU_VEC_210) +- return -1; +- } +- } + init_bank_swizzle(&bs); + if (scalar_only == false) { + for (i = 0; i < 4; i++) { +@@ -910,8 +905,10 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc, + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; +- else ++ else if (i < max_slots - 1) + bank_swizzle[i] = SQ_ALU_VEC_012; ++ else ++ return -1; + } + } + } +diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c +index 6df0d91..bb85fc1 100644 +--- a/src/gallium/drivers/r600/r600_buffer.c ++++ b/src/gallium/drivers/r600/r600_buffer.c +@@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, + { + struct r600_resource *rbuffer = r600_resource(buf); + ++ util_range_destroy(&rbuffer->valid_buffer_range); + pb_reference(&rbuffer->buf, NULL); + FREE(rbuffer); + } +@@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, + + assert(box->x + box->width <= resource->width0); + ++ /* See if the buffer range being mapped has never been initialized, ++ * in which case it can be mapped unsynchronized. */ ++ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && ++ usage & PIPE_TRANSFER_WRITE && ++ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { ++ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; ++ } ++ + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + assert(usage & PIPE_TRANSFER_WRITE); +@@ -178,6 +187,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, + { + struct r600_context *rctx = (struct r600_context*)pipe; + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; ++ struct r600_resource *rbuffer = r600_resource(transfer->resource); + + if (rtransfer->staging) { + struct pipe_resource *dst, *src; +@@ -189,7 +199,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, + doffset = transfer->box.x; + soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT; + /* Copy the staging buffer into the original one. */ +- if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset)) { ++ if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) { + if (rctx->screen->chip_class >= EVERGREEN) { + evergreen_dma_copy(rctx, dst, src, doffset, soffset, size); + } else { +@@ -203,6 +213,11 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, + } + pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); + } ++ ++ if (transfer->usage & PIPE_TRANSFER_WRITE) { ++ util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, ++ transfer->box.x + transfer->box.width); ++ } + util_slab_free(&rctx->pool_transfers, transfer); + } + +@@ -259,6 +274,7 @@ bool r600_init_resource(struct r600_screen *rscreen, + + res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf); + res->domains = domains; ++ util_range_set_empty(&res->valid_buffer_range); + return true; + } + +@@ -275,6 +291,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + pipe_reference_init(&rbuffer->b.b.reference, 1); + rbuffer->b.b.screen = screen; + rbuffer->b.vtbl = &r600_buffer_vtbl; ++ util_range_init(&rbuffer->valid_buffer_range); + + if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) { + FREE(rbuffer); +diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c +index 9091ec0..322381a 100644 +--- a/src/gallium/drivers/r600/r600_hw_context.c ++++ b/src/gallium/drivers/r600/r600_hw_context.c +@@ -648,6 +648,12 @@ void r600_flush_emit(struct r600_context *rctx) + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0); + } + ++ if (rctx->chip_class >= R700 && ++ (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) { ++ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ++ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0); ++ } ++ + if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); +@@ -742,6 +748,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) + */ + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_FLUSH_AND_INV_CB_META | ++ R600_CONTEXT_FLUSH_AND_INV_DB_META | + R600_CONTEXT_WAIT_3D_IDLE | + R600_CONTEXT_WAIT_CP_DMA_IDLE; + +@@ -1119,6 +1126,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, + rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES | + R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_FLUSH_AND_INV_CB_META | ++ R600_CONTEXT_FLUSH_AND_INV_DB_META | + R600_CONTEXT_STREAMOUT_FLUSH | + R600_CONTEXT_WAIT_3D_IDLE; + +@@ -1164,6 +1172,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, + + /* Invalidate the read caches. */ + rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; ++ ++ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, ++ dst_offset + size); + } + + void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw) +@@ -1210,4 +1221,7 @@ void r600_dma_copy(struct r600_context *rctx, + src_offset += csize << shift; + size -= csize; + } ++ ++ util_range_add(&rdst->valid_buffer_range, dst_offset, ++ dst_offset + size); + } +diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h +index 692e6ec..3b50f68 100644 +--- a/src/gallium/drivers/r600/r600_hw_context_priv.h ++++ b/src/gallium/drivers/r600/r600_hw_context_priv.h +@@ -29,7 +29,7 @@ + #include "r600_pipe.h" + + /* the number of CS dwords for flushing and drawing */ +-#define R600_MAX_FLUSH_CS_DWORDS 12 ++#define R600_MAX_FLUSH_CS_DWORDS 16 + #define R600_MAX_DRAW_CS_DWORDS 34 + #define R600_TRACE_CS_DWORDS 7 + +diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c +index fa66fcc..7a41688 100644 +--- a/src/gallium/drivers/r600/r600_llvm.c ++++ b/src/gallium/drivers/r600/r600_llvm.c +@@ -38,8 +38,12 @@ static LLVMValueRef llvm_fetch_const( + LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.SwizzleX], ""); + offset[1] = LLVMBuildAdd(bld_base->base.gallivm->builder, offset[1], index, ""); + } ++ unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ; ++ if (reg->Register.Dimension) { ++ ConstantAddressSpace += reg->Dimension.Index; ++ } + LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024), +- CONSTANT_BUFFER_0_ADDR_SPACE); ++ ConstantAddressSpace); + LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, ""); + LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, ""); + LLVMValueRef cvecval = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, ""); +diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c +index a59578d..a7973a5 100644 +--- a/src/gallium/drivers/r600/r600_pipe.c ++++ b/src/gallium/drivers/r600/r600_pipe.c +@@ -22,6 +22,7 @@ + */ + #include "r600_pipe.h" + #include "r600_public.h" ++#include "r600d.h" + + #include + #include "pipe/p_shader_tokens.h" +@@ -165,12 +166,23 @@ static void r600_flush_gfx_ring(void *ctx, unsigned flags) + static void r600_flush_dma_ring(void *ctx, unsigned flags) + { + struct r600_context *rctx = (struct r600_context *)ctx; ++ struct radeon_winsys_cs *cs = rctx->rings.dma.cs; ++ unsigned padding_dw, i; + +- if (!rctx->rings.dma.cs->cdw) { ++ if (!cs->cdw) { + return; + } ++ ++ /* Pad the DMA CS to a multiple of 8 dwords. */ ++ padding_dw = 8 - cs->cdw % 8; ++ if (padding_dw < 8) { ++ for (i = 0; i < padding_dw; i++) { ++ cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); ++ } ++ } ++ + rctx->rings.dma.flushing = true; +- rctx->ws->cs_flush(rctx->rings.dma.cs, flags); ++ rctx->ws->cs_flush(cs, flags); + rctx->rings.dma.flushing = false; + } + +diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h +index ec59c92..1be4321 100644 +--- a/src/gallium/drivers/r600/r600_pipe.h ++++ b/src/gallium/drivers/r600/r600_pipe.h +@@ -298,7 +298,8 @@ struct r600_dsa_state { + unsigned alpha_ref; + ubyte valuemask[2]; + ubyte writemask[2]; +- unsigned sx_alpha_test_control; ++ unsigned zwritemask; ++ unsigned sx_alpha_test_control; + }; + + struct r600_pipe_shader; +@@ -513,6 +514,7 @@ struct r600_context { + bool alpha_to_one; + bool force_blend_disable; + boolean dual_src_blend; ++ unsigned zwritemask; + + /* Index buffer. */ + struct pipe_index_buffer index_buffer; +diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c +index 3f165f7..70232fd 100644 +--- a/src/gallium/drivers/r600/r600_state.c ++++ b/src/gallium/drivers/r600/r600_state.c +@@ -802,6 +802,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, + dsa->valuemask[1] = state->stencil[1].valuemask; + dsa->writemask[0] = state->stencil[0].writemask; + dsa->writemask[1] = state->stencil[1].writemask; ++ dsa->zwritemask = state->depth.writemask; + + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | +@@ -1515,6 +1516,11 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, + } + if (rctx->framebuffer.state.zsbuf) { + rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; ++ ++ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture; ++ if (rctx->chip_class >= R700 && rtex->htile) { ++ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META; ++ } + } + + /* Set the new state. */ +diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c +index 88bb62b..f0e9de3 100644 +--- a/src/gallium/drivers/r600/r600_state_common.c ++++ b/src/gallium/drivers/r600/r600_state_common.c +@@ -284,6 +284,16 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) + ref.valuemask[1] = dsa->valuemask[1]; + ref.writemask[0] = dsa->writemask[0]; + ref.writemask[1] = dsa->writemask[1]; ++ if (rctx->zwritemask != dsa->zwritemask) { ++ rctx->zwritemask = dsa->zwritemask; ++ if (rctx->chip_class >= EVERGREEN) { ++ /* work around some issue when not writting to zbuffer ++ * we are having lockup on evergreen so do not enable ++ * hyperz when not writting zbuffer ++ */ ++ rctx->db_misc_state.atom.dirty = true; ++ } ++ } + + r600_set_stencil_ref(ctx, &ref); + +@@ -972,6 +982,7 @@ r600_create_so_target(struct pipe_context *ctx, + { + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_so_target *t; ++ struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + t = CALLOC_STRUCT(r600_so_target); + if (!t) { +@@ -991,6 +1002,9 @@ r600_create_so_target(struct pipe_context *ctx, + pipe_resource_reference(&t->b.buffer, buffer); + t->b.buffer_offset = buffer_offset; + t->b.buffer_size = buffer_size; ++ ++ util_range_add(&rbuffer->valid_buffer_range, buffer_offset, ++ buffer_offset + buffer_size); + return &t->b; + } + +diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h +index 621e7a1..81e5a6c 100644 +--- a/src/gallium/drivers/r600/r600d.h ++++ b/src/gallium/drivers/r600/r600d.h +@@ -119,6 +119,7 @@ + #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 + #define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f + #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20 ++#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */ + #define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */ + #define EVENT_TYPE(x) ((x) << 0) + #define EVENT_INDEX(x) ((x) << 8) +diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +index 0f90991..8902ae4 100644 +--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c ++++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +@@ -766,6 +766,22 @@ static void emit_icmp( + emit_data->output[emit_data->chan] = v; + } + ++static void emit_ucmp( ++ const struct lp_build_tgsi_action * action, ++ struct lp_build_tgsi_context * bld_base, ++ struct lp_build_emit_data * emit_data) ++{ ++ unsigned pred; ++ LLVMBuilderRef builder = bld_base->base.gallivm->builder; ++ LLVMContextRef context = bld_base->base.gallivm->context; ++ ++ ++ LLVMValueRef v = LLVMBuildFCmp(builder, LLVMRealUGE, ++ emit_data->args[0], lp_build_const_float(bld_base->base.gallivm, 0.), ""); ++ ++ emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, v, emit_data->args[2], emit_data->args[1], ""); ++} ++ + static void emit_cmp( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context * bld_base, +@@ -1241,6 +1257,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) + bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; + bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; ++ bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; + + bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem; + bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq"; +diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c +index 2545634..7922928 100644 +--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c ++++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c +@@ -309,14 +309,8 @@ static void declare_input_fs( + /* XXX: Handle all possible interpolation modes */ + switch (decl->Interp.Interpolate) { + case TGSI_INTERPOLATE_COLOR: +- /* XXX: Flat shading hangs the GPU */ +- if (si_shader_ctx->rctx->queued.named.rasterizer && +- si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { +-#if 0 ++ if (si_shader_ctx->key.flatshade) { + intr_name = "llvm.SI.fs.interp.constant"; +-#else +- intr_name = "llvm.SI.fs.interp.linear.center"; +-#endif + } else { + if (decl->Interp.Centroid) + intr_name = "llvm.SI.fs.interp.persp.centroid"; +@@ -325,11 +319,8 @@ static void declare_input_fs( + } + break; + case TGSI_INTERPOLATE_CONSTANT: +- /* XXX: Flat shading hangs the GPU */ +-#if 0 + intr_name = "llvm.SI.fs.interp.constant"; + break; +-#endif + case TGSI_INTERPOLATE_LINEAR: + if (decl->Interp.Centroid) + intr_name = "llvm.SI.fs.interp.linear.centroid"; +diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h +index 07b2f9f..f54f67c 100644 +--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h ++++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h +@@ -82,6 +82,7 @@ struct si_shader_key { + unsigned nr_cbufs:4; + unsigned color_two_side:1; + unsigned alpha_func:3; ++ unsigned flatshade:1; + float alpha_ref; + }; + +diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c +index a6b1983..39817fb 100644 +--- a/src/gallium/drivers/radeonsi/si_state.c ++++ b/src/gallium/drivers/radeonsi/si_state.c +@@ -421,8 +421,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, + rs->offset_units = state->offset_units; + rs->offset_scale = state->offset_scale * 12.0f; + +- /* XXX: Flat shading hangs the GPU */ +- tmp = S_0286D4_FLAT_SHADE_ENA(0); ++ tmp = S_0286D4_FLAT_SHADE_ENA(1); + if (state->sprite_coord_enable) { + tmp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | +@@ -1859,7 +1858,7 @@ static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *c + key.export_16bpc = rctx->export_16bpc; + if (rctx->queued.named.rasterizer) { + key.color_two_side = rctx->queued.named.rasterizer->two_side; +- /*key.flatshade = rctx->queued.named.rasterizer->flatshade;*/ ++ key.flatshade = rctx->queued.named.rasterizer->flatshade; + } + if (rctx->queued.named.dsa) { + key.alpha_func = rctx->queued.named.dsa->alpha_func; +diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c +index 3704410..8c35625 100644 +--- a/src/gallium/drivers/radeonsi/si_state_draw.c ++++ b/src/gallium/drivers/radeonsi/si_state_draw.c +@@ -128,11 +128,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s + continue; + } + +- /* XXX: Flat shading hangs the GPU */ +- if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || +- (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_COLOR && +- rctx->queued.named.rasterizer->flatshade)) +- have_linear = TRUE; + if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) +@@ -327,15 +322,12 @@ static void si_update_spi_map(struct r600_context *rctx) + bcolor: + tmp = 0; + +-#if 0 +- /* XXX: Flat shading hangs the GPU */ + if (name == TGSI_SEMANTIC_POSITION || + ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || + (ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR && +- rctx->rasterizer && rctx->rasterizer->flatshade)) { ++ rctx->ps_shader->current->key.flatshade)) { + tmp |= S_028644_FLAT_SHADE(1); + } +-#endif + + if (name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << ps->input[i].sid)) { +@@ -453,8 +445,14 @@ static void si_vertex_buffer_update(struct r600_context *rctx) + si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF); + si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(vb->stride))); +- si_pm4_sh_data_add(pm4, (vb->buffer->width0 - vb->buffer_offset) / +- MAX2(vb->stride, 1)); ++ if (vb->stride) ++ /* Round up by rounding down and adding 1 */ ++ si_pm4_sh_data_add(pm4, ++ (vb->buffer->width0 - offset - ++ util_format_get_blocksize(ve->src_format)) / ++ vb->stride + 1); ++ else ++ si_pm4_sh_data_add(pm4, vb->buffer->width0 - offset); + si_pm4_sh_data_add(pm4, rctx->vertex_elements->rsrc_word3[i]); + + if (!bound[ve->vertex_buffer_index]) { +diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c +index 607584f..021175c 100644 +--- a/src/gallium/state_trackers/glx/xlib/xm_api.c ++++ b/src/gallium/state_trackers/glx/xlib/xm_api.c +@@ -438,7 +438,6 @@ create_xmesa_buffer(Drawable d, BufferType type, + { + XMesaDisplay xmdpy = xmesa_init_display(vis->display); + XMesaBuffer b; +- uint width, height; + + ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER); + +@@ -457,7 +456,7 @@ create_xmesa_buffer(Drawable d, BufferType type, + b->type = type; + b->cmap = cmap; + +- get_drawable_size(vis->display, d, &width, &height); ++ get_drawable_size(vis->display, d, &b->width, &b->height); + + /* + * Create framebuffer, but we'll plug in our own renderbuffers below. +diff --git a/src/gallium/targets/dri-vmwgfx/Makefile.am b/src/gallium/targets/dri-vmwgfx/Makefile.am +index 06ebf88..ca7df65 100644 +--- a/src/gallium/targets/dri-vmwgfx/Makefile.am ++++ b/src/gallium/targets/dri-vmwgfx/Makefile.am +@@ -58,17 +58,13 @@ vmwgfx_dri_la_LIBADD = \ + $(top_builddir)/src/gallium/drivers/svga/libsvga.la \ + $(GALLIUM_DRI_LIB_DEPS) + +-if HAVE_MESA_LLVM + vmwgfx_dri_la_LINK = $(CXXLINK) $(vmwgfx_dri_la_LDFLAGS) + # Mention a dummy pure C++ file to trigger generation of the $(LINK) variable + nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-cpp.cpp + ++if HAVE_MESA_LLVM + vmwgfx_dri_la_LDFLAGS += $(LLVM_LDFLAGS) + vmwgfx_dri_la_LIBADD += $(LLVM_LIBS) +-else +-vmwgfx_dri_la_LINK = $(LINK) $(vmwgfx_dri_la_LDFLAGS) +-# Mention a dummy pure C file to trigger generation of the $(LINK) variable +-nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-c.c + endif + + # Provide compatibility with scripts for the old Mesa build system for +diff --git a/src/gallium/targets/vdpau-softpipe/Makefile.am b/src/gallium/targets/vdpau-softpipe/Makefile.am +index 3372b5c..7bde2f8 100644 +--- a/src/gallium/targets/vdpau-softpipe/Makefile.am ++++ b/src/gallium/targets/vdpau-softpipe/Makefile.am +@@ -35,7 +35,7 @@ vdpaudir = $(VDPAU_LIB_INSTALL_DIR) + vdpau_LTLIBRARIES = libvdpau_softpipe.la + + libvdpau_softpipe_la_SOURCES = \ +- $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_dri.c ++ $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_xsp.c + + libvdpau_softpipe_la_LDFLAGS = \ + -module \ +diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +index 2d41c26..f4ac526 100644 +--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c ++++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +@@ -957,16 +957,16 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, + + bo->flinked = TRUE; + bo->flink = flink.name; ++ ++ pipe_mutex_lock(bo->mgr->bo_handles_mutex); ++ util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)bo->flink, bo); ++ pipe_mutex_unlock(bo->mgr->bo_handles_mutex); + } + whandle->handle = bo->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = bo->handle; + } + +- pipe_mutex_lock(bo->mgr->bo_handles_mutex); +- util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo); +- pipe_mutex_unlock(bo->mgr->bo_handles_mutex); +- + whandle->stride = stride; + return TRUE; + } +diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c +index 519929e..a3a0530 100644 +--- a/src/gbm/backends/dri/gbm_dri.c ++++ b/src/gbm/backends/dri/gbm_dri.c +@@ -481,6 +481,7 @@ create_dumb(struct gbm_device *gbm, + bo->base.base.width = width; + bo->base.base.height = height; + bo->base.base.stride = create_arg.pitch; ++ bo->base.base.format = format; + bo->base.base.handle.u32 = create_arg.handle; + bo->handle = create_arg.handle; + bo->size = create_arg.size; +@@ -529,6 +530,7 @@ gbm_dri_bo_create(struct gbm_device *gbm, + bo->base.base.gbm = gbm; + bo->base.base.width = width; + bo->base.base.height = height; ++ bo->base.base.format = format; + + switch (format) { + case GBM_FORMAT_RGB565: +diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c +index 4e32b50..29a209e 100644 +--- a/src/mesa/drivers/common/meta.c ++++ b/src/mesa/drivers/common/meta.c +@@ -1910,6 +1910,14 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, + GLuint *tmp = malloc(srcW * srcH * sizeof(GLuint)); + + if (tmp) { ++ ++ newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT); ++ _mesa_ReadPixels(srcX, srcY, srcW, srcH, GL_DEPTH_COMPONENT, ++ GL_UNSIGNED_INT, tmp); ++ setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT, ++ srcW, srcH, GL_DEPTH_COMPONENT, ++ GL_UNSIGNED_INT, tmp); ++ + /* texcoords (after texture allocation!) */ + { + verts[0].s = 0.0F; +@@ -1928,15 +1936,6 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, + if (!blit->DepthFP) + init_blit_depth_pixels(ctx); + +- /* maybe change tex format here */ +- newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT); +- +- _mesa_ReadPixels(srcX, srcY, srcW, srcH, +- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp); +- +- setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT, srcW, srcH, +- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp); +- + _mesa_BindProgramARB(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP); + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); +diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am +index dc140df..77670ef 100644 +--- a/src/mesa/drivers/dri/i965/Makefile.am ++++ b/src/mesa/drivers/dri/i965/Makefile.am +@@ -62,6 +62,7 @@ TEST_LIBS = \ + ../common/libdri_test_stubs.la + + i965_dri_la_SOURCES = ++nodist_EXTRA_i965_dri_la_SOURCES = dummy2.cpp + i965_dri_la_LIBADD = $(COMMON_LIBS) + i965_dri_la_LDFLAGS = -module -avoid-version -shared + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index 8dab431..f80219e 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -258,6 +258,26 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, + return instructions; + } + ++/** ++ * A helper for MOV generation for fixing up broken hardware SEND dependency ++ * handling. ++ */ ++fs_inst * ++fs_visitor::DEP_RESOLVE_MOV(int grf) ++{ ++ fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); ++ ++ inst->ir = NULL; ++ inst->annotation = "send dependency resolve"; ++ ++ /* The caller always wants uncompressed to emit the minimal extra ++ * dependencies, and to avoid having to deal with aligning its regs to 2. ++ */ ++ inst->force_uncompressed = true; ++ ++ return inst; ++} ++ + bool + fs_inst::equals(fs_inst *inst) + { +@@ -1690,8 +1710,6 @@ fs_visitor::setup_pull_constants() + dst, index, offset); + pull->ir = inst->ir; + pull->annotation = inst->annotation; +- pull->base_mrf = 14; +- pull->mlen = 1; + + inst->insert_before(pull); + +@@ -1911,6 +1929,7 @@ fs_visitor::register_coalesce() + + bool has_source_modifiers = (inst->src[0].abs || + inst->src[0].negate || ++ inst->src[0].smear != -1 || + inst->src[0].file == UNIFORM); + + /* Found a move of a GRF to a GRF. Let's see if we can coalesce +@@ -2228,6 +2247,265 @@ fs_visitor::remove_duplicate_mrf_writes() + return progress; + } + ++static void ++clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps, ++ int first_grf, int grf_len) ++{ ++ bool inst_16wide = (dispatch_width > 8 && ++ !inst->force_uncompressed && ++ !inst->force_sechalf); ++ ++ /* Clear the flag for registers that actually got read (as expected). */ ++ for (int i = 0; i < 3; i++) { ++ int grf; ++ if (inst->src[i].file == GRF) { ++ grf = inst->src[i].reg; ++ } else if (inst->src[i].file == FIXED_HW_REG && ++ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { ++ grf = inst->src[i].fixed_hw_reg.nr; ++ } else { ++ continue; ++ } ++ ++ if (grf >= first_grf && ++ grf < first_grf + grf_len) { ++ deps[grf - first_grf] = false; ++ if (inst_16wide) ++ deps[grf - first_grf + 1] = false; ++ } ++ } ++} ++ ++/** ++ * Implements this workaround for the original 965: ++ * ++ * "[DevBW, DevCL] Implementation Restrictions: As the hardware does not ++ * check for post destination dependencies on this instruction, software ++ * must ensure that there is no destination hazard for the case of ‘write ++ * followed by a posted write’ shown in the following example. ++ * ++ * 1. mov r3 0 ++ * 2. send r3.xy ++ * 3. mov r2 r3 ++ * ++ * Due to no post-destination dependency check on the ‘send’, the above ++ * code sequence could have two instructions (1 and 2) in flight at the ++ * same time that both consider ‘r3’ as the target of their final writes. ++ */ ++void ++fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) ++{ ++ int write_len = inst->regs_written() * dispatch_width / 8; ++ int first_write_grf = inst->dst.reg; ++ bool needs_dep[BRW_MAX_MRF]; ++ assert(write_len < (int)sizeof(needs_dep) - 1); ++ ++ memset(needs_dep, false, sizeof(needs_dep)); ++ memset(needs_dep, true, write_len); ++ ++ clear_deps_for_inst_src(inst, dispatch_width, ++ needs_dep, first_write_grf, write_len); ++ ++ /* Walk backwards looking for writes to registers we're writing which ++ * aren't read since being written. If we hit the start of the program, ++ * we assume that there are no outstanding dependencies on entry to the ++ * program. ++ */ ++ for (fs_inst *scan_inst = (fs_inst *)inst->prev; ++ scan_inst != NULL; ++ scan_inst = (fs_inst *)scan_inst->prev) { ++ ++ /* If we hit control flow, assume that there *are* outstanding ++ * dependencies, and force their cleanup before our instruction. ++ */ ++ if (scan_inst->is_control_flow()) { ++ for (int i = 0; i < write_len; i++) { ++ if (needs_dep[i]) { ++ inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i)); ++ } ++ } ++ } ++ ++ bool scan_inst_16wide = (dispatch_width > 8 && ++ !scan_inst->force_uncompressed && ++ !scan_inst->force_sechalf); ++ ++ /* We insert our reads as late as possible on the assumption that any ++ * instruction but a MOV that might have left us an outstanding ++ * dependency has more latency than a MOV. ++ */ ++ if (scan_inst->dst.file == GRF && ++ scan_inst->dst.reg >= first_write_grf && ++ scan_inst->dst.reg < first_write_grf + write_len && ++ needs_dep[scan_inst->dst.reg - first_write_grf]) { ++ inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg)); ++ needs_dep[scan_inst->dst.reg - first_write_grf] = false; ++ if (scan_inst_16wide) ++ needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false; ++ } ++ ++ /* Clear the flag for registers that actually got read (as expected). */ ++ clear_deps_for_inst_src(scan_inst, dispatch_width, ++ needs_dep, first_write_grf, write_len); ++ ++ /* Continue the loop only if we haven't resolved all the dependencies */ ++ int i; ++ for (i = 0; i < write_len; i++) { ++ if (needs_dep[i]) ++ break; ++ } ++ if (i == write_len) ++ return; ++ } ++} ++ ++/** ++ * Implements this workaround for the original 965: ++ * ++ * "[DevBW, DevCL] Errata: A destination register from a send can not be ++ * used as a destination register until after it has been sourced by an ++ * instruction with a different destination register. ++ */ ++void ++fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst) ++{ ++ int write_len = inst->regs_written() * dispatch_width / 8; ++ int first_write_grf = inst->dst.reg; ++ bool needs_dep[BRW_MAX_MRF]; ++ assert(write_len < (int)sizeof(needs_dep) - 1); ++ ++ memset(needs_dep, false, sizeof(needs_dep)); ++ memset(needs_dep, true, write_len); ++ /* Walk forwards looking for writes to registers we're writing which aren't ++ * read before being written. ++ */ ++ for (fs_inst *scan_inst = (fs_inst *)inst->next; ++ !scan_inst->is_tail_sentinel(); ++ scan_inst = (fs_inst *)scan_inst->next) { ++ /* If we hit control flow, force resolve all remaining dependencies. */ ++ if (scan_inst->is_control_flow()) { ++ for (int i = 0; i < write_len; i++) { ++ if (needs_dep[i]) ++ scan_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i)); ++ } ++ } ++ ++ /* Clear the flag for registers that actually got read (as expected). */ ++ clear_deps_for_inst_src(scan_inst, dispatch_width, ++ needs_dep, first_write_grf, write_len); ++ ++ /* We insert our reads as late as possible since they're reading the ++ * result of a SEND, which has massive latency. ++ */ ++ if (scan_inst->dst.file == GRF && ++ scan_inst->dst.reg >= first_write_grf && ++ scan_inst->dst.reg < first_write_grf + write_len && ++ needs_dep[scan_inst->dst.reg - first_write_grf]) { ++ scan_inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg)); ++ needs_dep[scan_inst->dst.reg - first_write_grf] = false; ++ } ++ ++ /* Continue the loop only if we haven't resolved all the dependencies */ ++ int i; ++ for (i = 0; i < write_len; i++) { ++ if (needs_dep[i]) ++ break; ++ } ++ if (i == write_len) ++ return; ++ } ++ ++ /* If we hit the end of the program, resolve all remaining dependencies out ++ * of paranoia. ++ */ ++ fs_inst *last_inst = (fs_inst *)this->instructions.get_tail(); ++ assert(last_inst->eot); ++ for (int i = 0; i < write_len; i++) { ++ if (needs_dep[i]) ++ last_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i)); ++ } ++} ++ ++void ++fs_visitor::insert_gen4_send_dependency_workarounds() ++{ ++ if (intel->gen != 4 || intel->is_g4x) ++ return; ++ ++ /* Note that we're done with register allocation, so GRF fs_regs always ++ * have a .reg_offset of 0. ++ */ ++ ++ foreach_list_safe(node, &this->instructions) { ++ fs_inst *inst = (fs_inst *)node; ++ ++ if (inst->mlen != 0 && inst->dst.file == GRF) { ++ insert_gen4_pre_send_dependency_workarounds(inst); ++ insert_gen4_post_send_dependency_workarounds(inst); ++ } ++ } ++} ++ ++/** ++ * Turns the generic expression-style uniform pull constant load instruction ++ * into a hardware-specific series of instructions for loading a pull ++ * constant. ++ * ++ * The expression style allows the CSE pass before this to optimize out ++ * repeated loads from the same offset, and gives the pre-register-allocation ++ * scheduling full flexibility, while the conversion to native instructions ++ * allows the post-register-allocation scheduler the best information ++ * possible. ++ */ ++void ++fs_visitor::lower_uniform_pull_constant_loads() ++{ ++ foreach_list(node, &this->instructions) { ++ fs_inst *inst = (fs_inst *)node; ++ ++ if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) ++ continue; ++ ++ if (intel->gen >= 7) { ++ fs_reg const_offset_reg = inst->src[1]; ++ assert(const_offset_reg.file == IMM && ++ const_offset_reg.type == BRW_REGISTER_TYPE_UD); ++ const_offset_reg.imm.u /= 16; ++ fs_reg payload = fs_reg(this, glsl_type::uint_type); ++ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), ++ BRW_REGISTER_TYPE_UD); ++ ++ fs_inst *setup1 = MOV(payload, fs_reg(g0)); ++ setup1->force_writemask_all = true; ++ /* We don't need the second half of this vgrf to be filled with g1 ++ * in the 16-wide case, but if we use force_uncompressed then live ++ * variable analysis won't consider this a def! ++ */ ++ ++ fs_inst *setup2 = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET, ++ payload, payload, ++ const_offset_reg); ++ ++ setup1->ir = inst->ir; ++ setup1->annotation = inst->annotation; ++ inst->insert_before(setup1); ++ setup2->ir = inst->ir; ++ setup2->annotation = inst->annotation; ++ inst->insert_before(setup2); ++ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7; ++ inst->src[1] = payload; ++ } else { ++ /* Before register allocation, we didn't tell the scheduler about the ++ * MRF we use. We know it's safe to use this MRF because nothing ++ * else does except for register spill/unspill, which generates and ++ * uses its MRF within a single IR instruction. ++ */ ++ inst->base_mrf = 14; ++ inst->mlen = 1; ++ } ++ } ++} ++ + void + fs_visitor::dump_instruction(fs_inst *inst) + { +@@ -2500,6 +2778,8 @@ fs_visitor::run() + + schedule_instructions(false); + ++ lower_uniform_pull_constant_loads(); ++ + assign_curb_setup(); + assign_urb_setup(); + +@@ -2522,6 +2802,12 @@ fs_visitor::run() + assert(force_uncompressed_stack == 0); + assert(force_sechalf_stack == 0); + ++ /* This must come after all optimization and register allocation, since ++ * it inserts dead code that happens to have side effects, and it does ++ * so based on the actual physical registers in use. ++ */ ++ insert_gen4_send_dependency_workarounds(); ++ + if (failed) + return false; + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h +index 88fecb9..d1bb111 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.h ++++ b/src/mesa/drivers/dri/i965/brw_fs.h +@@ -285,6 +285,7 @@ public: + fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition); + fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, + uint32_t condition); ++ fs_inst *DEP_RESOLVE_MOV(int grf); + + int type_size(const struct glsl_type *type); + fs_inst *get_instruction_generating_reg(fs_inst *start, +@@ -329,7 +330,11 @@ public: + bool remove_duplicate_mrf_writes(); + bool virtual_grf_interferes(int a, int b); + void schedule_instructions(bool post_reg_alloc); ++ void insert_gen4_send_dependency_workarounds(); ++ void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst); ++ void insert_gen4_post_send_dependency_workarounds(fs_inst *inst); + void fail(const char *msg, ...); ++ void lower_uniform_pull_constant_loads(); + + void push_force_uncompressed(); + void pop_force_uncompressed(); +diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +index c4ec1d9..194ed07 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +@@ -223,7 +223,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) + inst->src[arg].file = entry->src.file; + inst->src[arg].reg = entry->src.reg; + inst->src[arg].reg_offset = entry->src.reg_offset; +- inst->src[arg].smear = entry->src.smear; ++ if (entry->src.smear != -1) ++ inst->src[arg].smear = entry->src.smear; + + if (!inst->src[arg].abs) { + inst->src[arg].abs = entry->src.abs; +diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +index 70c143a..a13ca36 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +@@ -105,7 +105,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) + /* Match current instruction's expression against those in AEB. */ + if (inst->opcode == entry->generator->opcode && + inst->saturate == entry->generator->saturate && +- operands_match(entry->generator->src, inst->src)) { ++ inst->dst.type == entry->generator->dst.type && ++ operands_match(entry->generator->src, inst->src)) { + + found = true; + progress = true; +diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +index 45072da..365a2ec 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +@@ -604,29 +604,8 @@ fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst) + { + assert(inst->mlen != 0); + +- /* Clear any post destination dependencies that would be ignored by +- * the block read. See the B-Spec for pre-gen5 send instruction. +- * +- * This could use a better solution, since texture sampling and +- * math reads could potentially run into it as well -- anywhere +- * that we have a SEND with a destination that is a register that +- * was written but not read within the last N instructions (what's +- * N? unsure). This is rare because of dead code elimination, but +- * not impossible. +- */ +- if (intel->gen == 4 && !intel->is_g4x) +- brw_MOV(p, brw_null_reg(), dst); +- + brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, + inst->offset); +- +- if (intel->gen == 4 && !intel->is_g4x) { +- /* gen4 errata: destination from a send can't be used as a +- * destination until it's been read. Just read it so we don't +- * have to worry. +- */ +- brw_MOV(p, brw_null_reg(), dst); +- } + } + + void +@@ -637,19 +616,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, + { + assert(inst->mlen != 0); + +- /* Clear any post destination dependencies that would be ignored by +- * the block read. See the B-Spec for pre-gen5 send instruction. +- * +- * This could use a better solution, since texture sampling and +- * math reads could potentially run into it as well -- anywhere +- * that we have a SEND with a destination that is a register that +- * was written but not read within the last N instructions (what's +- * N? unsure). This is rare because of dead code elimination, but +- * not impossible. +- */ +- if (intel->gen == 4 && !intel->is_g4x) +- brw_MOV(p, brw_null_reg(), dst); +- + assert(index.file == BRW_IMMEDIATE_VALUE && + index.type == BRW_REGISTER_TYPE_UD); + uint32_t surf_index = index.dw1.ud; +@@ -660,14 +626,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, + + brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), + read_offset, surf_index); +- +- if (intel->gen == 4 && !intel->is_g4x) { +- /* gen4 errata: destination from a send can't be used as a +- * destination until it's been read. Just read it so we don't +- * have to worry. +- */ +- brw_MOV(p, brw_null_reg(), dst); +- } + } + + void +diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +index d4f6fc9..573921c 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +@@ -597,31 +597,9 @@ fs_visitor::visit(ir_expression *ir) + fs_reg packed_consts = fs_reg(this, glsl_type::float_type); + packed_consts.type = result.type; + +- if (intel->gen >= 7) { +- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16); +- fs_reg payload = fs_reg(this, glsl_type::uint_type); +- struct brw_reg g0 = retype(brw_vec8_grf(0, 0), +- BRW_REGISTER_TYPE_UD); +- fs_inst *setup = emit(MOV(payload, fs_reg(g0))); +- setup->force_writemask_all = true; +- /* We don't need the second half of this vgrf to be filled with g1 +- * in the 16-wide case, but if we use force_uncompressed then live +- * variable analysis won't consider this a def! +- */ +- +- emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload, +- payload, const_offset_reg); +- emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts, +- surf_index, payload); +- } else { +- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]); +- fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, +- packed_consts, +- surf_index, +- const_offset_reg)); +- pull->base_mrf = 14; +- pull->mlen = 1; +- } ++ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); ++ emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, ++ packed_consts, surf_index, const_offset_reg)); + + packed_consts.smear = const_offset->value.u[0] % 16 / 4; + for (int i = 0; i < ir->type->vector_elements; i++) { +diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c +index 3d53843..48635c5 100644 +--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c ++++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c +@@ -238,6 +238,23 @@ static void calc_wm_input_sizes( struct brw_context *brw ) + + calc_sizes(&t); + ++ /* _NEW_POINT ++ * ++ * If the SF will be replacing the vertex output with a reference to ++ * gl_PointCoord, then tell the fragment shader that the value actually ++ * does vary. ++ */ ++ if (ctx->Point.PointSprite) { ++ for (int i = 0; i < 8; i++) { ++ if (ctx->Point.CoordReplace[i]) { ++ t.size_masks[4-1] |= FRAG_BIT_TEX(i); ++ t.size_masks[3-1] |= FRAG_BIT_TEX(i); ++ t.size_masks[2-1] |= FRAG_BIT_TEX(i); ++ t.size_masks[1-1] |= FRAG_BIT_TEX(i); ++ } ++ } ++ } ++ + if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) { + memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); + brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; +@@ -246,7 +263,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) + + const struct brw_tracked_state brw_wm_input_sizes = { + .dirty = { +- .mesa = _NEW_LIGHT | _NEW_PROGRAM, ++ .mesa = _NEW_LIGHT | _NEW_PROGRAM | _NEW_POINT, + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, + .cache = 0 + }, +diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h +index 9c00ba8..885f6c2 100644 +--- a/src/mesa/drivers/dri/intel/intel_chipset.h ++++ b/src/mesa/drivers/dri/intel/intel_chipset.h +@@ -114,15 +114,15 @@ + #define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ + #define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A + #define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A +-#define PCI_CHIP_HASWELL_CRW_GT1 0x0D12 /* Desktop */ +-#define PCI_CHIP_HASWELL_CRW_GT2 0x0D22 +-#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D32 +-#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 /* Mobile */ +-#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26 +-#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36 +-#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A /* Server */ +-#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A +-#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A ++#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ ++#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 ++#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D22 ++#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ ++#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 ++#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D26 ++#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ ++#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A ++#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D2A + + #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ +diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c +index a951283..6d91534 100644 +--- a/src/mesa/main/attrib.c ++++ b/src/mesa/main/attrib.c +@@ -130,6 +130,9 @@ struct gl_enable_attrib + GLboolean VertexProgramPointSize; + GLboolean VertexProgramTwoSide; + ++ /* GL_ARB_fragment_program */ ++ GLboolean FragmentProgram; ++ + /* GL_ARB_point_sprite / GL_NV_point_sprite */ + GLboolean PointSprite; + GLboolean FragmentShaderATI; +@@ -316,6 +319,10 @@ _mesa_PushAttrib(GLbitfield mask) + attr->VertexProgram = ctx->VertexProgram.Enabled; + attr->VertexProgramPointSize = ctx->VertexProgram.PointSizeEnabled; + attr->VertexProgramTwoSide = ctx->VertexProgram.TwoSideEnabled; ++ ++ /* GL_ARB_fragment_program */ ++ attr->FragmentProgram = ctx->FragmentProgram.Enabled; ++ + save_attrib_data(&head, GL_ENABLE_BIT, attr); + + /* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */ +@@ -607,6 +614,11 @@ pop_enable_group(struct gl_context *ctx, const struct gl_enable_attrib *enable) + enable->VertexProgramTwoSide, + GL_VERTEX_PROGRAM_TWO_SIDE_ARB); + ++ /* GL_ARB_fragment_program */ ++ TEST_AND_UPDATE(ctx->FragmentProgram.Enabled, ++ enable->FragmentProgram, ++ GL_FRAGMENT_PROGRAM_ARB); ++ + /* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */ + TEST_AND_UPDATE(ctx->Color.sRGBEnabled, enable->sRGBEnabled, + GL_FRAMEBUFFER_SRGB); +diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c +index 5e9e539..df57b76 100644 +--- a/src/mesa/main/context.c ++++ b/src/mesa/main/context.c +@@ -1072,7 +1072,6 @@ _mesa_initialize_context(struct gl_context *ctx, + case API_OPENGLES2: + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; +- ctx->Point.PointSprite = GL_TRUE; /* always on for ES 2.x */ + break; + } + +diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c +index 8728540..c1e1658 100644 +--- a/src/mesa/main/glformats.c ++++ b/src/mesa/main/glformats.c +@@ -917,7 +917,7 @@ _mesa_is_compressed_format(struct gl_context *ctx, GLenum format) + case GL_COMPRESSED_SIGNED_RG11_EAC: + case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: + case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: +- return _mesa_is_gles3(ctx); ++ return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility; + case GL_PALETTE4_RGB8_OES: + case GL_PALETTE4_RGBA8_OES: + case GL_PALETTE4_R5_G6_B5_OES: +diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c +index 1778640..c925d4c 100644 +--- a/src/mesa/main/points.c ++++ b/src/mesa/main/points.c +@@ -253,7 +253,8 @@ _mesa_init_point(struct gl_context *ctx) + * In a core context, the state will default to true, and the setters and + * getters are disabled. + */ +- ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE); ++ ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE || ++ ctx->API == API_OPENGLES2); + + ctx->Point.SpriteRMode = GL_ZERO; /* GL_NV_point_sprite (only!) */ + ctx->Point.SpriteOrigin = GL_UPPER_LEFT; /* GL_ARB_point_sprite */ +diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c +index d1723b8..1b9525b 100644 +--- a/src/mesa/main/teximage.c ++++ b/src/mesa/main/teximage.c +@@ -520,7 +520,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat ) + } + } + +- if (_mesa_is_gles3(ctx)) { ++ if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { + switch (internalFormat) { + case GL_COMPRESSED_RGB8_ETC2: + case GL_COMPRESSED_SRGB8_ETC2: +@@ -3187,6 +3187,12 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image) + return; + } + ++ if (!image) { ++ _mesa_error(ctx, GL_INVALID_OPERATION, ++ "glEGLImageTargetTexture2D(image=%p)", image); ++ return; ++ } ++ + if (ctx->NewState & _NEW_PIXEL) + _mesa_update_state(ctx); + +diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c +index 52ede13..6f18ec6 100644 +--- a/src/mesa/main/texparam.c ++++ b/src/mesa/main/texparam.c +@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) + *params = (GLfloat) obj->Immutable; + break; + ++ case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES: ++ if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external) ++ goto invalid_pname; ++ *params = obj->RequiredTextureImageUnits; ++ break; ++ + case GL_TEXTURE_SRGB_DECODE_EXT: + if (!ctx->Extensions.EXT_texture_sRGB_decode) + goto invalid_pname; +diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c +index f20df9e..7fdfa72 100644 +--- a/src/mesa/state_tracker/st_atom_rasterizer.c ++++ b/src/mesa/state_tracker/st_atom_rasterizer.c +@@ -135,16 +135,12 @@ static void update_raster_state( struct st_context *st ) + + /* _NEW_POLYGON + */ +- if (ctx->Polygon.OffsetUnits != 0.0 || +- ctx->Polygon.OffsetFactor != 0.0) { +- raster->offset_point = ctx->Polygon.OffsetPoint; +- raster->offset_line = ctx->Polygon.OffsetLine; +- raster->offset_tri = ctx->Polygon.OffsetFill; +- } +- + if (ctx->Polygon.OffsetPoint || + ctx->Polygon.OffsetLine || + ctx->Polygon.OffsetFill) { ++ raster->offset_point = ctx->Polygon.OffsetPoint; ++ raster->offset_line = ctx->Polygon.OffsetLine; ++ raster->offset_tri = ctx->Polygon.OffsetFill; + raster->offset_units = ctx->Polygon.OffsetUnits; + raster->offset_scale = ctx->Polygon.OffsetFactor; + } +diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c +index 63dbdb2..36fffe9 100644 +--- a/src/mesa/state_tracker/st_cb_bitmap.c ++++ b/src/mesa/state_tracker/st_cb_bitmap.c +@@ -675,11 +675,12 @@ st_flush_bitmap_cache(struct st_context *st) + * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc. + */ + static GLboolean +-accum_bitmap(struct st_context *st, ++accum_bitmap(struct gl_context *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) + { ++ struct st_context *st = ctx->st; + struct bitmap_cache *cache = st->bitmap.cache; + int px = -999, py = -999; + const GLfloat z = st->ctx->Current.RasterPos[2]; +@@ -729,9 +730,17 @@ accum_bitmap(struct st_context *st, + /* create the transfer if needed */ + create_cache_trans(st); + ++ /* PBO source... */ ++ bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap); ++ if (!bitmap) { ++ return FALSE; ++ } ++ + unpack_bitmap(st, px, py, width, height, unpack, bitmap, + cache->buffer, BITMAP_CACHE_WIDTH); + ++ _mesa_unmap_pbo_source(ctx, unpack); ++ + return GL_TRUE; /* accumulated */ + } + +@@ -764,7 +773,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, + semantic_indexes); + } + +- if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap)) ++ if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap)) + return; + + pt = make_bitmap_texture(ctx, width, height, unpack, bitmap); +diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c +index de62264..bff8d9b 100644 +--- a/src/mesa/state_tracker/st_draw.c ++++ b/src/mesa/state_tracker/st_draw.c +@@ -283,7 +283,7 @@ st_draw_vbo(struct gl_context *ctx, + /* don't trim, restarts might be inside index list */ + cso_draw_vbo(st->cso_context, &info); + } +- else if (u_trim_pipe_prim(info.mode, &info.count)) ++ else if (u_trim_pipe_prim(prims[i].mode, &info.count)) + cso_draw_vbo(st->cso_context, &info); + } + +diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c +index a9111b5..f56f7cb 100644 +--- a/src/mesa/state_tracker/st_program.c ++++ b/src/mesa/state_tracker/st_program.c +@@ -1142,7 +1142,7 @@ st_print_shaders(struct gl_context *ctx) + static void + destroy_program_variants(struct st_context *st, struct gl_program *program) + { +- if (!program) ++ if (!program || program == &_mesa_DummyProgram) + return; + + switch (program->Target) { diff --git a/mesa.spec b/mesa.spec index bcfa976..f360bdf 100644 --- a/mesa.spec +++ b/mesa.spec @@ -1,5 +1,4 @@ %if 0%{?rhel} -%define rhel_no_hw_arches ppc ppc64 ppc64p7 %define with_private_llvm 1 %else %define with_private_llvm 0 @@ -15,7 +14,7 @@ %endif # S390 doesn't have video cards, but we need swrast for xserver's GLX -%ifarch s390 s390x %{?rhel_no_hw_arches} +%ifarch s390 s390x %define with_hardware 0 %define dri_drivers --with-dri-drivers=swrast %else @@ -49,7 +48,7 @@ Summary: Mesa graphics libraries Name: mesa Version: 9.1 -Release: 2%{?dist} +Release: 3%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -64,16 +63,18 @@ Source3: make-git-snapshot.sh # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD. Source4: Mesa-MLAA-License-Clarification-Email.txt -# -fno-rtti makes nv50 assert angry -Patch0: nv50-fix-build.patch -Patch1: intel-revert-gl3.patch +# git diff-tree -p mesa-9.1..origin/9.1 > `git describe origin/9.1`.patch +Patch0: mesa-9.1-53-gd0ccb5b.patch + +Patch1: nv50-fix-build.patch +Patch2: intel-revert-gl3.patch #Patch7: mesa-7.1-link-shared.patch Patch9: mesa-8.0-llvmpipe-shmget.patch #Patch11: mesa-8.0-nouveau-tfp-blacklist.patch Patch12: mesa-8.0.1-fix-16bpp.patch -#Patch13: mesa-9.0.1-less-cxx-please.patch Patch14: i965-hack-hiz-snb-fix.patch + BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} BuildRequires: kernel-headers @@ -282,8 +283,9 @@ Mesa shared glapi %prep %setup -q -n Mesa-%{version}%{?snapshot} #setup -q -n mesa-%{gitdate} -%patch0 -p1 -b .nv50rtti -%patch1 -p1 -b .nogl3 +%patch0 -p1 -b .git +%patch1 -p1 -b .nv50rtti +%patch2 -p1 -b .nogl3 #%patch11 -p1 -b .nouveau # this fastpath is: @@ -297,8 +299,6 @@ Mesa shared glapi #patch9 -p1 -b .shmget #patch12 -p1 -b .16bpp -#%patch13 -p1 -b .less-cpp - # hack from chromium - awaiting real upstream fix %patch14 -p1 -b .snbfix # default to dri (not xlib) for libGL on all arches @@ -592,6 +592,9 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Tue Mar 19 2013 Adam Jackson 9.1-3 +- mesa-9.1-53-gd0ccb5b.patch: Sync with today's git + * Tue Mar 19 2013 Dave Airlie 9.1-2 - add SNB hang workaround from chromium