mesa/mesa-9.1-53-gd0ccb5b.patch

1975 lines
76 KiB
Diff
Raw Normal View History

diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh
index a141afe..d3ac511 100755
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -8,7 +8,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
# Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: This is a candidate' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: .*[Cc]andidate' HEAD..origin/master |\
while read sha
do
# Check to see whether the patch is on the ignore list.
diff --git a/common.py b/common.py
index 6ff9608..1d618e6 100644
--- a/common.py
+++ b/common.py
@@ -100,4 +100,4 @@ def AddOptions(opts):
opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no'))
if host_platform == 'windows':
- opts.Add(EnumOption('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0')))
+ opts.Add(EnumOption('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0', '10.0', '11.0')))
diff --git a/configure.ac b/configure.ac
index 5701f8a..d75cf65 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1682,6 +1682,9 @@ if test "x$enable_gallium_llvm" = xyes; then
if $LLVM_CONFIG --components | grep -q '\<mcjit\>'; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
fi
+ if $LLVM_CONFIG --components | grep -q '\<oprofilejit\>'; then
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit"
+ fi
if test "x$enable_opencl" = xyes; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
diff --git a/docs/index.html b/docs/index.html
index 5c92204..5d7229d 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,23 @@
<h1>News</h1>
+<h2>February 22, 2013</h2>
+
+<p>
+<a href="relnotes-9.1.html">Mesa 9.1</a> is released.
+This is a new development release.
+See the release notes for more information about the release.
+</p>
+
+
+<h2>February 21, 2013</h2>
+
+<p>
+<a href="relnotes-9.0.3.html">Mesa 9.0.3</a> is released.
+This is a bug fix release.
+</p>
+
+
<h2>January 22, 2013</h2>
<p>
diff --git a/docs/relnotes-9.1.html b/docs/relnotes-9.1.html
index 24ba9f9..8232ab8 100644
--- a/docs/relnotes-9.1.html
+++ b/docs/relnotes-9.1.html
@@ -14,7 +14,7 @@
<iframe src="contents.html"></iframe>
<div class="content">
-<h1>Mesa 9.1 Release Notes / date February 22, 2013</h1>
+<h1>Mesa 9.1 Release Notes / February 22, 2013</h1>
<p>
Mesa 9.1 is a new development release.
@@ -33,7 +33,9 @@ because GL_ARB_compatibility is not supported.
<h2>MD5 checksums</h2>
<pre>
-tbd
+86d40f3056f89949368764bf84aff55e MesaLib-9.1.tar.gz
+d3891e02215422e120271d976ff1947e MesaLib-9.1.tar.bz2
+01645f28f53351c23b0beb6c688911d8 MesaLib-9.1.zip
</pre>
diff --git a/docs/relnotes.html b/docs/relnotes.html
index e373091..2e11bc4 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -22,6 +22,7 @@ The release notes summarize what's new or changed in each Mesa release.
<ul>
<li><a href="relnotes-9.1.html">9.1 release notes</a>
+<li><a href="relnotes-9.0.3.html">9.0.3 release notes</a>
<li><a href="relnotes-9.0.2.html">9.0.2 release notes</a>
<li><a href="relnotes-9.0.1.html">9.0.1 release notes</a>
<li><a href="relnotes-9.0.html">9.0 release notes</a>
diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 09dca5b..1e388f8 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -53,12 +53,12 @@ CHIPSET(0x0A26, HASWELL_ULT_M_GT2_PLUS, hsw_gt2)
CHIPSET(0x0A0A, HASWELL_ULT_S_GT1, hsw_gt1)
CHIPSET(0x0A1A, HASWELL_ULT_S_GT2, hsw_gt2)
CHIPSET(0x0A2A, HASWELL_ULT_S_GT2_PLUS, hsw_gt2)
-CHIPSET(0x0D12, HASWELL_CRW_GT1, hsw_gt1)
-CHIPSET(0x0D22, HASWELL_CRW_GT2, hsw_gt2)
-CHIPSET(0x0D32, HASWELL_CRW_GT2_PLUS, hsw_gt2)
-CHIPSET(0x0D16, HASWELL_CRW_M_GT1, hsw_gt1)
-CHIPSET(0x0D26, HASWELL_CRW_M_GT2, hsw_gt2)
-CHIPSET(0x0D36, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
-CHIPSET(0x0D1A, HASWELL_CRW_S_GT1, hsw_gt1)
-CHIPSET(0x0D2A, HASWELL_CRW_S_GT2, hsw_gt2)
-CHIPSET(0x0D3A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D02, HASWELL_CRW_GT1, hsw_gt1)
+CHIPSET(0x0D12, HASWELL_CRW_GT2, hsw_gt2)
+CHIPSET(0x0D22, HASWELL_CRW_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D06, HASWELL_CRW_M_GT1, hsw_gt1)
+CHIPSET(0x0D16, HASWELL_CRW_M_GT2, hsw_gt2)
+CHIPSET(0x0D26, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D0A, HASWELL_CRW_S_GT1, hsw_gt1)
+CHIPSET(0x0D1A, HASWELL_CRW_S_GT2, hsw_gt2)
+CHIPSET(0x0D2A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
diff --git a/include/pci_ids/r600_pci_ids.h b/include/pci_ids/r600_pci_ids.h
index 7ceb820..9c9bab2 100644
--- a/include/pci_ids/r600_pci_ids.h
+++ b/include/pci_ids/r600_pci_ids.h
@@ -298,6 +298,10 @@ CHIPSET(0x9907, ARUBA_9907, ARUBA)
CHIPSET(0x9908, ARUBA_9908, ARUBA)
CHIPSET(0x9909, ARUBA_9909, ARUBA)
CHIPSET(0x990A, ARUBA_990A, ARUBA)
+CHIPSET(0x990B, ARUBA_990B, ARUBA)
+CHIPSET(0x990C, ARUBA_990C, ARUBA)
+CHIPSET(0x990D, ARUBA_990D, ARUBA)
+CHIPSET(0x990E, ARUBA_990E, ARUBA)
CHIPSET(0x990F, ARUBA_990F, ARUBA)
CHIPSET(0x9910, ARUBA_9910, ARUBA)
CHIPSET(0x9913, ARUBA_9913, ARUBA)
@@ -309,6 +313,13 @@ CHIPSET(0x9991, ARUBA_9991, ARUBA)
CHIPSET(0x9992, ARUBA_9992, ARUBA)
CHIPSET(0x9993, ARUBA_9993, ARUBA)
CHIPSET(0x9994, ARUBA_9994, ARUBA)
+CHIPSET(0x9995, ARUBA_9995, ARUBA)
+CHIPSET(0x9996, ARUBA_9996, ARUBA)
+CHIPSET(0x9997, ARUBA_9997, ARUBA)
+CHIPSET(0x9998, ARUBA_9998, ARUBA)
+CHIPSET(0x9999, ARUBA_9999, ARUBA)
+CHIPSET(0x999A, ARUBA_999A, ARUBA)
+CHIPSET(0x999B, ARUBA_999B, ARUBA)
CHIPSET(0x99A0, ARUBA_99A0, ARUBA)
CHIPSET(0x99A2, ARUBA_99A2, ARUBA)
CHIPSET(0x99A4, ARUBA_99A4, ARUBA)
diff --git a/scons/gallium.py b/scons/gallium.py
index 4b51b6e..b28be5d 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -289,6 +289,7 @@ def generate(env):
'_CRT_SECURE_NO_DEPRECATE',
'_SCL_SECURE_NO_WARNINGS',
'_SCL_SECURE_NO_DEPRECATE',
+ '_ALLOW_KEYWORD_MACROS',
]
if env['build'] in ('debug', 'checked'):
cppdefines += ['_DEBUG']
@@ -401,6 +402,8 @@ def generate(env):
'/Oi', # enable intrinsic functions
]
else:
+ if distutils.version.LooseVersion(env['MSVC_VERSION']) < distutils.version.LooseVersion('11.0'):
+ print 'scons: warning: Visual Studio versions prior to 2012 are known to produce incorrect code when optimizations are enabled ( https://bugs.freedesktop.org/show_bug.cgi?id=58718 )'
ccflags += [
'/O2', # optimize for speed
]
diff --git a/scons/llvm.py b/scons/llvm.py
index e1ed760..7f00c6c 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -92,7 +92,19 @@ def generate(env):
'HAVE_STDINT_H',
])
env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
- if llvm_version >= distutils.version.LooseVersion('3.0'):
+ if llvm_version >= distutils.version.LooseVersion('3.2'):
+ # 3.2
+ env.Prepend(LIBS = [
+ 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
+ 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
+ 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
+ 'LLVMX86Utils', 'LLVMX86Info', 'LLVMJIT',
+ 'LLVMExecutionEngine', 'LLVMCodeGen', 'LLVMScalarOpts',
+ 'LLVMInstCombine', 'LLVMTransformUtils', 'LLVMipa',
+ 'LLVMAnalysis', 'LLVMTarget', 'LLVMMC', 'LLVMCore',
+ 'LLVMSupport', 'LLVMRuntimeDyld', 'LLVMObject'
+ ])
+ elif llvm_version >= distutils.version.LooseVersion('3.0'):
# 3.0
env.Prepend(LIBS = [
'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 351fbf4..e17d5be 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -195,7 +195,14 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
for (i = 0; attr_list[i] != EGL_NONE; i += 2)
_eglSetConfigKey(&base, attr_list[i], attr_list[i+1]);
- if (depth > 0 && depth != base.BufferSize)
+ /* Allow a 24-bit RGB visual to match a 32-bit RGBA EGLConfig. Otherwise
+ * it will only match a 32-bit RGBA visual. On a composited window manager
+ * on X11, this will make all of the EGLConfigs with destination alpha get
+ * blended by the compositor. This is probably not what the application
+ * wants... especially on drivers that only have 32-bit RGBA EGLConfigs!
+ */
+ if (depth > 0 && depth != base.BufferSize
+ && !(depth == 24 && base.BufferSize == 32))
return NULL;
if (rgba_masks && memcmp(rgba_masks, dri_masks, sizeof(dri_masks)))
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 7b879c4..3110809 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -167,12 +167,17 @@ static void interp( const struct clip_stage *clip,
{
int k;
t_nopersp = t;
- for (k = 0; k < 2; k++)
+ /* find either in.x != out.x or in.y != out.y */
+ for (k = 0; k < 2; k++) {
if (in->clip[k] != out->clip[k]) {
- t_nopersp = (dst->clip[k] - out->clip[k]) /
- (in->clip[k] - out->clip[k]);
+ /* do divide by W, then compute linear interpolation factor */
+ float in_coord = in->clip[k] / in->clip[3];
+ float out_coord = out->clip[k] / out->clip[3];
+ float dst_coord = dst->clip[k] / dst->clip[3];
+ t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
break;
}
+ }
}
/* Other attributes
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index 3da52b1..3578525 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -127,10 +127,44 @@ static void offset_first_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct offset_stage *offset = offset_stage(stage);
+ const struct pipe_rasterizer_state *rast = stage->draw->rasterizer;
+ unsigned fill_mode = rast->fill_front;
+ boolean do_offset;
+
+ if (rast->fill_back != rast->fill_front) {
+ /* Need to check for back-facing triangle */
+ boolean ccw = header->det < 0.0f;
+ if (ccw != rast->front_ccw)
+ fill_mode = rast->fill_back;
+ }
+
+ /* Now determine if we need to do offsetting for the point/line/fill mode */
+ switch (fill_mode) {
+ case PIPE_POLYGON_MODE_FILL:
+ do_offset = rast->offset_tri;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ do_offset = rast->offset_line;
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ do_offset = rast->offset_point;
+ break;
+ default:
+ assert(!"invalid fill_mode in offset_first_tri()");
+ do_offset = rast->offset_tri;
+ }
+
+ if (do_offset) {
+ offset->scale = rast->offset_scale;
+ offset->clamp = rast->offset_clamp;
+ offset->units = (float) (rast->offset_units * stage->draw->mrd);
+ }
+ else {
+ offset->scale = 0.0f;
+ offset->clamp = 0.0f;
+ offset->units = 0.0f;
+ }
- offset->units = (float) (stage->draw->rasterizer->offset_units * stage->draw->mrd);
- offset->scale = stage->draw->rasterizer->offset_scale;
- offset->clamp = stage->draw->rasterizer->offset_clamp;
stage->tri = offset_tri;
stage->tri( stage, header );
diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h
new file mode 100644
index 0000000..4b1d0d1
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_range.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2013 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * @file
+ * 1D integer range, capable of the union and intersection operations.
+ *
+ * It only maintains a single interval which is extended when the union is
+ * done. This implementation is partially thread-safe (readers are not
+ * protected by a lock).
+ *
+ * @author Marek Olšák
+ */
+
+#ifndef U_RANGE_H
+#define U_RANGE_H
+
+#include "os/os_thread.h"
+
+struct util_range {
+ unsigned start; /* inclusive */
+ unsigned end; /* exclusive */
+
+ /* for the range to be consistent with multiple contexts: */
+ pipe_mutex write_mutex;
+};
+
+
+static INLINE void
+util_range_set_empty(struct util_range *range)
+{
+ range->start = ~0;
+ range->end = 0;
+}
+
+/* This is like a union of two sets. */
+static INLINE void
+util_range_add(struct util_range *range, unsigned start, unsigned end)
+{
+ if (start < range->start || end > range->end) {
+ pipe_mutex_lock(range->write_mutex);
+ range->start = MIN2(start, range->start);
+ range->end = MAX2(end, range->end);
+ pipe_mutex_unlock(range->write_mutex);
+ }
+}
+
+static INLINE boolean
+util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
+{
+ return MAX2(start, range->start) < MIN2(end, range->end);
+}
+
+
+/* Init/deinit */
+
+static INLINE void
+util_range_init(struct util_range *range)
+{
+ pipe_mutex_init(range->write_mutex);
+ util_range_set_empty(range);
+}
+
+static INLINE void
+util_range_destroy(struct util_range *range)
+{
+ pipe_mutex_destroy(range->write_mutex);
+}
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index 40ccaf6..ca8df71 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -46,6 +46,10 @@ clear_flags(struct pipe_rasterizer_state *rast)
{
rast->light_twoside = 0;
rast->offset_tri = 0;
+ rast->offset_line = 0;
+ rast->offset_point = 0;
+ rast->offset_units = 0.0f;
+ rast->offset_scale = 0.0f;
}
@@ -74,6 +78,8 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
*/
need_pipeline = (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
rast->fill_back != PIPE_POLYGON_MODE_FILL ||
+ rast->offset_point ||
+ rast->offset_line ||
rast->point_smooth ||
rast->line_smooth ||
rast->line_stipple_enable ||
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 2e9c6bf..f17a04a 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -295,7 +295,9 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
/* assert(lpr->base.bind); */
if (resource_is_texture(&lpr->base)) {
- if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) {
+ if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) {
/* displayable surface */
if (!llvmpipe_displaytarget_layout(screen, lpr))
goto fail;
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index bb47530..bb43353 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -283,4 +283,7 @@ void evergreen_dma_copy(struct r600_context *rctx,
src_offset += csize << shift;
size -= csize;
}
+
+ util_range_add(&rdst->valid_buffer_range, dst_offset,
+ dst_offset + size);
}
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 389ad3c..804c037 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -808,6 +808,7 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
dsa->valuemask[1] = state->stencil[1].valuemask;
dsa->writemask[0] = state->stencil[0].writemask;
dsa->writemask[1] = state->stencil[1].writemask;
+ dsa->zwritemask = state->depth.writemask;
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
@@ -1321,6 +1322,10 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
* elements. */
surf->cb_color_dim = pipe_buffer->width0;
+ /* Set the buffer range the GPU will have access to: */
+ util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range,
+ 0, pipe_buffer->width0);
+
surf->cb_color_cmask = surf->cb_color_base;
surf->cb_color_cmask_slice = 0;
surf->cb_color_fmask = surf->cb_color_base;
@@ -1405,10 +1410,15 @@ void evergreen_init_color_surface(struct r600_context *rctx,
S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) |
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
- if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) {
- unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
- color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
- S_028C74_NUM_FRAGMENTS(log_samples);
+ if (rctx->chip_class == CAYMAN) {
+ color_attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] ==
+ UTIL_FORMAT_SWIZZLE_1);
+
+ if (rtex->resource.b.b.nr_samples > 1) {
+ unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
+ color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
+ S_028C74_NUM_FRAGMENTS(log_samples);
+ }
}
ntype = V_028C70_NUMBER_UNORM;
@@ -1647,6 +1657,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
}
if (rctx->framebuffer.state.zsbuf) {
rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+
+ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
+ if (rtex->htile) {
+ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META;
+ }
}
util_copy_framebuffer_state(&rctx->framebuffer.state, state);
@@ -2222,7 +2237,14 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
}
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
}
- if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
+ /* FIXME we should be able to use hyperz even if we are not writing to
+ * zbuffer but somehow this trigger GPU lockup. See :
+ *
+ * https://bugs.freedesktop.org/show_bug.cgi?id=60848
+ *
+ * Disable hyperz for now if not writing to zbuffer.
+ */
+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled && rctx->zwritemask) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
/* This is to fix a lockup when hyperz and alpha test are enabled at
@@ -3654,6 +3676,17 @@ boolean evergreen_dma_blit(struct pipe_context *ctx,
return FALSE;
}
+ /* 128 bpp surfaces require non_disp_tiling for both
+ * tiled and linear buffers on cayman. However, async
+ * DMA only supports it on the tiled side. As such
+ * the tile order is backwards after a L2T/T2L packet.
+ */
+ if ((rctx->chip_class == CAYMAN) &&
+ (src_mode != dst_mode) &&
+ (util_format_get_blocksize(src->format) >= 16)) {
+ return FALSE;
+ }
+
if (src_mode == dst_mode) {
uint64_t dst_offset, src_offset;
/* simple dma blit would do NOTE code here assume :
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 11dbb3b..0115293 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -28,6 +28,7 @@
#include "../../winsys/radeon/drm/radeon_winsys.h"
#include "util/u_double_list.h"
+#include "util/u_range.h"
#include "util/u_transfer.h"
#define R600_ERR(fmt, args...) \
@@ -50,6 +51,16 @@ struct r600_resource {
/* Resource state. */
unsigned domains;
+
+ /* The buffer range which is initialized (with a write transfer,
+ * streamout, DMA, or as a random access target). The rest of
+ * the buffer is considered invalid and can be mapped unsynchronized.
+ *
+ * This allows unsychronized mapping of a buffer range which hasn't
+ * been used yet. It's for applications which forget to use
+ * the unsynchronized map flag and expect the driver to figure it out.
+ */
+ struct util_range valid_buffer_range;
};
#define R600_BLOCK_MAX_BO 32
@@ -152,6 +163,7 @@ struct r600_so_target {
#define R600_CONTEXT_FLUSH_AND_INV (1 << 4)
#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5)
#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 6)
+#define R600_CONTEXT_FLUSH_AND_INV_DB_META (1 << 7)
struct r600_context;
struct r600_screen;
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f25c6aa..bda425c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -322,6 +322,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecod
output->swizzle_y == bc->cf_last->output.swizzle_y &&
output->swizzle_z == bc->cf_last->output.swizzle_z &&
output->swizzle_w == bc->cf_last->output.swizzle_w &&
+ output->comp_mask == bc->cf_last->output.comp_mask &&
(output->burst_count + bc->cf_last->output.burst_count) <= 16) {
if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
@@ -873,12 +874,6 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
bank_swizzle[4] = SQ_ALU_SCL_210;
while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
- if (max_slots == 4) {
- for (i = 0; i < max_slots; i++) {
- if (bank_swizzle[i] == SQ_ALU_VEC_210)
- return -1;
- }
- }
init_bank_swizzle(&bs);
if (scalar_only == false) {
for (i = 0; i < 4; i++) {
@@ -910,8 +905,10 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
bank_swizzle[i]++;
if (bank_swizzle[i] <= SQ_ALU_VEC_210)
break;
- else
+ else if (i < max_slots - 1)
bank_swizzle[i] = SQ_ALU_VEC_012;
+ else
+ return -1;
}
}
}
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 6df0d91..bb85fc1 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
{
struct r600_resource *rbuffer = r600_resource(buf);
+ util_range_destroy(&rbuffer->valid_buffer_range);
pb_reference(&rbuffer->buf, NULL);
FREE(rbuffer);
}
@@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
assert(box->x + box->width <= resource->width0);
+ /* See if the buffer range being mapped has never been initialized,
+ * in which case it can be mapped unsynchronized. */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ usage & PIPE_TRANSFER_WRITE &&
+ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
assert(usage & PIPE_TRANSFER_WRITE);
@@ -178,6 +187,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
{
struct r600_context *rctx = (struct r600_context*)pipe;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+ struct r600_resource *rbuffer = r600_resource(transfer->resource);
if (rtransfer->staging) {
struct pipe_resource *dst, *src;
@@ -189,7 +199,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
doffset = transfer->box.x;
soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
/* Copy the staging buffer into the original one. */
- if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset)) {
+ if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) {
if (rctx->screen->chip_class >= EVERGREEN) {
evergreen_dma_copy(rctx, dst, src, doffset, soffset, size);
} else {
@@ -203,6 +213,11 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
}
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
}
+
+ if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
+ transfer->box.x + transfer->box.width);
+ }
util_slab_free(&rctx->pool_transfers, transfer);
}
@@ -259,6 +274,7 @@ bool r600_init_resource(struct r600_screen *rscreen,
res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
res->domains = domains;
+ util_range_set_empty(&res->valid_buffer_range);
return true;
}
@@ -275,6 +291,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
pipe_reference_init(&rbuffer->b.b.reference, 1);
rbuffer->b.b.screen = screen;
rbuffer->b.vtbl = &r600_buffer_vtbl;
+ util_range_init(&rbuffer->valid_buffer_range);
if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) {
FREE(rbuffer);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 9091ec0..322381a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -648,6 +648,12 @@ void r600_flush_emit(struct r600_context *rctx)
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0);
}
+ if (rctx->chip_class >= R700 &&
+ (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) {
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0);
+ }
+
if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
@@ -742,6 +748,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
*/
ctx->flags |= R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_WAIT_3D_IDLE |
R600_CONTEXT_WAIT_CP_DMA_IDLE;
@@ -1119,6 +1126,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_STREAMOUT_FLUSH |
R600_CONTEXT_WAIT_3D_IDLE;
@@ -1164,6 +1172,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
/* Invalidate the read caches. */
rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
+
+ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+ dst_offset + size);
}
void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw)
@@ -1210,4 +1221,7 @@ void r600_dma_copy(struct r600_context *rctx,
src_offset += csize << shift;
size -= csize;
}
+
+ util_range_add(&rdst->valid_buffer_range, dst_offset,
+ dst_offset + size);
}
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
index 692e6ec..3b50f68 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -29,7 +29,7 @@
#include "r600_pipe.h"
/* the number of CS dwords for flushing and drawing */
-#define R600_MAX_FLUSH_CS_DWORDS 12
+#define R600_MAX_FLUSH_CS_DWORDS 16
#define R600_MAX_DRAW_CS_DWORDS 34
#define R600_TRACE_CS_DWORDS 7
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index fa66fcc..7a41688 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -38,8 +38,12 @@ static LLVMValueRef llvm_fetch_const(
LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.SwizzleX], "");
offset[1] = LLVMBuildAdd(bld_base->base.gallivm->builder, offset[1], index, "");
}
+ unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
+ if (reg->Register.Dimension) {
+ ConstantAddressSpace += reg->Dimension.Index;
+ }
LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024),
- CONSTANT_BUFFER_0_ADDR_SPACE);
+ ConstantAddressSpace);
LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, "");
LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, "");
LLVMValueRef cvecval = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, "");
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index a59578d..a7973a5 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -22,6 +22,7 @@
*/
#include "r600_pipe.h"
#include "r600_public.h"
+#include "r600d.h"
#include <errno.h>
#include "pipe/p_shader_tokens.h"
@@ -165,12 +166,23 @@ static void r600_flush_gfx_ring(void *ctx, unsigned flags)
static void r600_flush_dma_ring(void *ctx, unsigned flags)
{
struct r600_context *rctx = (struct r600_context *)ctx;
+ struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
+ unsigned padding_dw, i;
- if (!rctx->rings.dma.cs->cdw) {
+ if (!cs->cdw) {
return;
}
+
+ /* Pad the DMA CS to a multiple of 8 dwords. */
+ padding_dw = 8 - cs->cdw % 8;
+ if (padding_dw < 8) {
+ for (i = 0; i < padding_dw; i++) {
+ cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+ }
+ }
+
rctx->rings.dma.flushing = true;
- rctx->ws->cs_flush(rctx->rings.dma.cs, flags);
+ rctx->ws->cs_flush(cs, flags);
rctx->rings.dma.flushing = false;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ec59c92..1be4321 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -298,7 +298,8 @@ struct r600_dsa_state {
unsigned alpha_ref;
ubyte valuemask[2];
ubyte writemask[2];
- unsigned sx_alpha_test_control;
+ unsigned zwritemask;
+ unsigned sx_alpha_test_control;
};
struct r600_pipe_shader;
@@ -513,6 +514,7 @@ struct r600_context {
bool alpha_to_one;
bool force_blend_disable;
boolean dual_src_blend;
+ unsigned zwritemask;
/* Index buffer. */
struct pipe_index_buffer index_buffer;
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3f165f7..70232fd 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -802,6 +802,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
dsa->valuemask[1] = state->stencil[1].valuemask;
dsa->writemask[0] = state->stencil[0].writemask;
dsa->writemask[1] = state->stencil[1].writemask;
+ dsa->zwritemask = state->depth.writemask;
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
@@ -1515,6 +1516,11 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
}
if (rctx->framebuffer.state.zsbuf) {
rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+
+ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
+ if (rctx->chip_class >= R700 && rtex->htile) {
+ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META;
+ }
}
/* Set the new state. */
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 88bb62b..f0e9de3 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -284,6 +284,16 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
ref.valuemask[1] = dsa->valuemask[1];
ref.writemask[0] = dsa->writemask[0];
ref.writemask[1] = dsa->writemask[1];
+ if (rctx->zwritemask != dsa->zwritemask) {
+ rctx->zwritemask = dsa->zwritemask;
+ if (rctx->chip_class >= EVERGREEN) {
+ /* work around some issue when not writting to zbuffer
+ * we are having lockup on evergreen so do not enable
+ * hyperz when not writting zbuffer
+ */
+ rctx->db_misc_state.atom.dirty = true;
+ }
+ }
r600_set_stencil_ref(ctx, &ref);
@@ -972,6 +982,7 @@ r600_create_so_target(struct pipe_context *ctx,
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_so_target *t;
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
t = CALLOC_STRUCT(r600_so_target);
if (!t) {
@@ -991,6 +1002,9 @@ r600_create_so_target(struct pipe_context *ctx,
pipe_resource_reference(&t->b.buffer, buffer);
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
+
+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
return &t->b;
}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 621e7a1..81e5a6c 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -119,6 +119,7 @@
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
+#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 0f90991..8902ae4 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -766,6 +766,22 @@ static void emit_icmp(
emit_data->output[emit_data->chan] = v;
}
+static void emit_ucmp(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ unsigned pred;
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+
+
+ LLVMValueRef v = LLVMBuildFCmp(builder, LLVMRealUGE,
+ emit_data->args[0], lp_build_const_float(bld_base->base.gallivm, 0.), "");
+
+ emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, v, emit_data->args[2], emit_data->args[1], "");
+}
+
static void emit_cmp(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context * bld_base,
@@ -1241,6 +1257,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
+ bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 2545634..7922928 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -309,14 +309,8 @@ static void declare_input_fs(
/* XXX: Handle all possible interpolation modes */
switch (decl->Interp.Interpolate) {
case TGSI_INTERPOLATE_COLOR:
- /* XXX: Flat shading hangs the GPU */
- if (si_shader_ctx->rctx->queued.named.rasterizer &&
- si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
-#if 0
+ if (si_shader_ctx->key.flatshade) {
intr_name = "llvm.SI.fs.interp.constant";
-#else
- intr_name = "llvm.SI.fs.interp.linear.center";
-#endif
} else {
if (decl->Interp.Centroid)
intr_name = "llvm.SI.fs.interp.persp.centroid";
@@ -325,11 +319,8 @@ static void declare_input_fs(
}
break;
case TGSI_INTERPOLATE_CONSTANT:
- /* XXX: Flat shading hangs the GPU */
-#if 0
intr_name = "llvm.SI.fs.interp.constant";
break;
-#endif
case TGSI_INTERPOLATE_LINEAR:
if (decl->Interp.Centroid)
intr_name = "llvm.SI.fs.interp.linear.centroid";
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index 07b2f9f..f54f67c 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -82,6 +82,7 @@ struct si_shader_key {
unsigned nr_cbufs:4;
unsigned color_two_side:1;
unsigned alpha_func:3;
+ unsigned flatshade:1;
float alpha_ref;
};
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index a6b1983..39817fb 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -421,8 +421,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->offset_units = state->offset_units;
rs->offset_scale = state->offset_scale * 12.0f;
- /* XXX: Flat shading hangs the GPU */
- tmp = S_0286D4_FLAT_SHADE_ENA(0);
+ tmp = S_0286D4_FLAT_SHADE_ENA(1);
if (state->sprite_coord_enable) {
tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
@@ -1859,7 +1858,7 @@ static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *c
key.export_16bpc = rctx->export_16bpc;
if (rctx->queued.named.rasterizer) {
key.color_two_side = rctx->queued.named.rasterizer->two_side;
- /*key.flatshade = rctx->queued.named.rasterizer->flatshade;*/
+ key.flatshade = rctx->queued.named.rasterizer->flatshade;
}
if (rctx->queued.named.dsa) {
key.alpha_func = rctx->queued.named.dsa->alpha_func;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3704410..8c35625 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -128,11 +128,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
continue;
}
- /* XXX: Flat shading hangs the GPU */
- if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
- (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
- rctx->queued.named.rasterizer->flatshade))
- have_linear = TRUE;
if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
have_linear = TRUE;
if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
@@ -327,15 +322,12 @@ static void si_update_spi_map(struct r600_context *rctx)
bcolor:
tmp = 0;
-#if 0
- /* XXX: Flat shading hangs the GPU */
if (name == TGSI_SEMANTIC_POSITION ||
ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
(ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
- rctx->rasterizer && rctx->rasterizer->flatshade)) {
+ rctx->ps_shader->current->key.flatshade)) {
tmp |= S_028644_FLAT_SHADE(1);
}
-#endif
if (name == TGSI_SEMANTIC_GENERIC &&
rctx->sprite_coord_enable & (1 << ps->input[i].sid)) {
@@ -453,8 +445,14 @@ static void si_vertex_buffer_update(struct r600_context *rctx)
si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF);
si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride)));
- si_pm4_sh_data_add(pm4, (vb->buffer->width0 - vb->buffer_offset) /
- MAX2(vb->stride, 1));
+ if (vb->stride)
+ /* Round up by rounding down and adding 1 */
+ si_pm4_sh_data_add(pm4,
+ (vb->buffer->width0 - offset -
+ util_format_get_blocksize(ve->src_format)) /
+ vb->stride + 1);
+ else
+ si_pm4_sh_data_add(pm4, vb->buffer->width0 - offset);
si_pm4_sh_data_add(pm4, rctx->vertex_elements->rsrc_word3[i]);
if (!bound[ve->vertex_buffer_index]) {
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index 607584f..021175c 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -438,7 +438,6 @@ create_xmesa_buffer(Drawable d, BufferType type,
{
XMesaDisplay xmdpy = xmesa_init_display(vis->display);
XMesaBuffer b;
- uint width, height;
ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER);
@@ -457,7 +456,7 @@ create_xmesa_buffer(Drawable d, BufferType type,
b->type = type;
b->cmap = cmap;
- get_drawable_size(vis->display, d, &width, &height);
+ get_drawable_size(vis->display, d, &b->width, &b->height);
/*
* Create framebuffer, but we'll plug in our own renderbuffers below.
diff --git a/src/gallium/targets/dri-vmwgfx/Makefile.am b/src/gallium/targets/dri-vmwgfx/Makefile.am
index 06ebf88..ca7df65 100644
--- a/src/gallium/targets/dri-vmwgfx/Makefile.am
+++ b/src/gallium/targets/dri-vmwgfx/Makefile.am
@@ -58,17 +58,13 @@ vmwgfx_dri_la_LIBADD = \
$(top_builddir)/src/gallium/drivers/svga/libsvga.la \
$(GALLIUM_DRI_LIB_DEPS)
-if HAVE_MESA_LLVM
vmwgfx_dri_la_LINK = $(CXXLINK) $(vmwgfx_dri_la_LDFLAGS)
# Mention a dummy pure C++ file to trigger generation of the $(LINK) variable
nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-cpp.cpp
+if HAVE_MESA_LLVM
vmwgfx_dri_la_LDFLAGS += $(LLVM_LDFLAGS)
vmwgfx_dri_la_LIBADD += $(LLVM_LIBS)
-else
-vmwgfx_dri_la_LINK = $(LINK) $(vmwgfx_dri_la_LDFLAGS)
-# Mention a dummy pure C file to trigger generation of the $(LINK) variable
-nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-c.c
endif
# Provide compatibility with scripts for the old Mesa build system for
diff --git a/src/gallium/targets/vdpau-softpipe/Makefile.am b/src/gallium/targets/vdpau-softpipe/Makefile.am
index 3372b5c..7bde2f8 100644
--- a/src/gallium/targets/vdpau-softpipe/Makefile.am
+++ b/src/gallium/targets/vdpau-softpipe/Makefile.am
@@ -35,7 +35,7 @@ vdpaudir = $(VDPAU_LIB_INSTALL_DIR)
vdpau_LTLIBRARIES = libvdpau_softpipe.la
libvdpau_softpipe_la_SOURCES = \
- $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_dri.c
+ $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_xsp.c
libvdpau_softpipe_la_LDFLAGS = \
-module \
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 2d41c26..f4ac526 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -957,16 +957,16 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
bo->flinked = TRUE;
bo->flink = flink.name;
+
+ pipe_mutex_lock(bo->mgr->bo_handles_mutex);
+ util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)bo->flink, bo);
+ pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
}
whandle->handle = bo->flink;
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
whandle->handle = bo->handle;
}
- pipe_mutex_lock(bo->mgr->bo_handles_mutex);
- util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
- pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
-
whandle->stride = stride;
return TRUE;
}
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 519929e..a3a0530 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -481,6 +481,7 @@ create_dumb(struct gbm_device *gbm,
bo->base.base.width = width;
bo->base.base.height = height;
bo->base.base.stride = create_arg.pitch;
+ bo->base.base.format = format;
bo->base.base.handle.u32 = create_arg.handle;
bo->handle = create_arg.handle;
bo->size = create_arg.size;
@@ -529,6 +530,7 @@ gbm_dri_bo_create(struct gbm_device *gbm,
bo->base.base.gbm = gbm;
bo->base.base.width = width;
bo->base.base.height = height;
+ bo->base.base.format = format;
switch (format) {
case GBM_FORMAT_RGB565:
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 4e32b50..29a209e 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -1910,6 +1910,14 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
GLuint *tmp = malloc(srcW * srcH * sizeof(GLuint));
if (tmp) {
+
+ newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT);
+ _mesa_ReadPixels(srcX, srcY, srcW, srcH, GL_DEPTH_COMPONENT,
+ GL_UNSIGNED_INT, tmp);
+ setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT,
+ srcW, srcH, GL_DEPTH_COMPONENT,
+ GL_UNSIGNED_INT, tmp);
+
/* texcoords (after texture allocation!) */
{
verts[0].s = 0.0F;
@@ -1928,15 +1936,6 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
if (!blit->DepthFP)
init_blit_depth_pixels(ctx);
- /* maybe change tex format here */
- newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT);
-
- _mesa_ReadPixels(srcX, srcY, srcW, srcH,
- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
-
- setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT, srcW, srcH,
- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
-
_mesa_BindProgramARB(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP);
_mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE);
_mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index dc140df..77670ef 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -62,6 +62,7 @@ TEST_LIBS = \
../common/libdri_test_stubs.la
i965_dri_la_SOURCES =
+nodist_EXTRA_i965_dri_la_SOURCES = dummy2.cpp
i965_dri_la_LIBADD = $(COMMON_LIBS)
i965_dri_la_LDFLAGS = -module -avoid-version -shared
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8dab431..f80219e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -258,6 +258,26 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
return instructions;
}
+/**
+ * A helper for MOV generation for fixing up broken hardware SEND dependency
+ * handling.
+ */
+fs_inst *
+fs_visitor::DEP_RESOLVE_MOV(int grf)
+{
+ fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
+
+ inst->ir = NULL;
+ inst->annotation = "send dependency resolve";
+
+ /* The caller always wants uncompressed to emit the minimal extra
+ * dependencies, and to avoid having to deal with aligning its regs to 2.
+ */
+ inst->force_uncompressed = true;
+
+ return inst;
+}
+
bool
fs_inst::equals(fs_inst *inst)
{
@@ -1690,8 +1710,6 @@ fs_visitor::setup_pull_constants()
dst, index, offset);
pull->ir = inst->ir;
pull->annotation = inst->annotation;
- pull->base_mrf = 14;
- pull->mlen = 1;
inst->insert_before(pull);
@@ -1911,6 +1929,7 @@ fs_visitor::register_coalesce()
bool has_source_modifiers = (inst->src[0].abs ||
inst->src[0].negate ||
+ inst->src[0].smear != -1 ||
inst->src[0].file == UNIFORM);
/* Found a move of a GRF to a GRF. Let's see if we can coalesce
@@ -2228,6 +2247,265 @@ fs_visitor::remove_duplicate_mrf_writes()
return progress;
}
+static void
+clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
+ int first_grf, int grf_len)
+{
+ bool inst_16wide = (dispatch_width > 8 &&
+ !inst->force_uncompressed &&
+ !inst->force_sechalf);
+
+ /* Clear the flag for registers that actually got read (as expected). */
+ for (int i = 0; i < 3; i++) {
+ int grf;
+ if (inst->src[i].file == GRF) {
+ grf = inst->src[i].reg;
+ } else if (inst->src[i].file == FIXED_HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ grf = inst->src[i].fixed_hw_reg.nr;
+ } else {
+ continue;
+ }
+
+ if (grf >= first_grf &&
+ grf < first_grf + grf_len) {
+ deps[grf - first_grf] = false;
+ if (inst_16wide)
+ deps[grf - first_grf + 1] = false;
+ }
+ }
+}
+
+/**
+ * Implements this workaround for the original 965:
+ *
+ * "[DevBW, DevCL] Implementation Restrictions: As the hardware does not
+ * check for post destination dependencies on this instruction, software
+ * must ensure that there is no destination hazard for the case of write
+ * followed by a posted write shown in the following example.
+ *
+ * 1. mov r3 0
+ * 2. send r3.xy <rest of send instruction>
+ * 3. mov r2 r3
+ *
+ * Due to no post-destination dependency check on the send, the above
+ * code sequence could have two instructions (1 and 2) in flight at the
+ * same time that both consider r3 as the target of their final writes.
+ */
+void
+fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+{
+ int write_len = inst->regs_written() * dispatch_width / 8;
+ int first_write_grf = inst->dst.reg;
+ bool needs_dep[BRW_MAX_MRF];
+ assert(write_len < (int)sizeof(needs_dep) - 1);
+
+ memset(needs_dep, false, sizeof(needs_dep));
+ memset(needs_dep, true, write_len);
+
+ clear_deps_for_inst_src(inst, dispatch_width,
+ needs_dep, first_write_grf, write_len);
+
+ /* Walk backwards looking for writes to registers we're writing which
+ * aren't read since being written. If we hit the start of the program,
+ * we assume that there are no outstanding dependencies on entry to the
+ * program.
+ */
+ for (fs_inst *scan_inst = (fs_inst *)inst->prev;
+ scan_inst != NULL;
+ scan_inst = (fs_inst *)scan_inst->prev) {
+
+ /* If we hit control flow, assume that there *are* outstanding
+ * dependencies, and force their cleanup before our instruction.
+ */
+ if (scan_inst->is_control_flow()) {
+ for (int i = 0; i < write_len; i++) {
+ if (needs_dep[i]) {
+ inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
+ }
+ }
+ }
+
+ bool scan_inst_16wide = (dispatch_width > 8 &&
+ !scan_inst->force_uncompressed &&
+ !scan_inst->force_sechalf);
+
+ /* We insert our reads as late as possible on the assumption that any
+ * instruction but a MOV that might have left us an outstanding
+ * dependency has more latency than a MOV.
+ */
+ if (scan_inst->dst.file == GRF &&
+ scan_inst->dst.reg >= first_write_grf &&
+ scan_inst->dst.reg < first_write_grf + write_len &&
+ needs_dep[scan_inst->dst.reg - first_write_grf]) {
+ inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
+ needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+ if (scan_inst_16wide)
+ needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false;
+ }
+
+ /* Clear the flag for registers that actually got read (as expected). */
+ clear_deps_for_inst_src(scan_inst, dispatch_width,
+ needs_dep, first_write_grf, write_len);
+
+ /* Continue the loop only if we haven't resolved all the dependencies */
+ int i;
+ for (i = 0; i < write_len; i++) {
+ if (needs_dep[i])
+ break;
+ }
+ if (i == write_len)
+ return;
+ }
+}
+
+/**
+ * Implements this workaround for the original 965:
+ *
+ * "[DevBW, DevCL] Errata: A destination register from a send can not be
+ * used as a destination register until after it has been sourced by an
+ * instruction with a different destination register.
+ */
+void
+fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
+{
+ int write_len = inst->regs_written() * dispatch_width / 8;
+ int first_write_grf = inst->dst.reg;
+ bool needs_dep[BRW_MAX_MRF];
+ assert(write_len < (int)sizeof(needs_dep) - 1);
+
+ memset(needs_dep, false, sizeof(needs_dep));
+ memset(needs_dep, true, write_len);
+ /* Walk forwards looking for writes to registers we're writing which aren't
+ * read before being written.
+ */
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
+ /* If we hit control flow, force resolve all remaining dependencies. */
+ if (scan_inst->is_control_flow()) {
+ for (int i = 0; i < write_len; i++) {
+ if (needs_dep[i])
+ scan_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
+ }
+ }
+
+ /* Clear the flag for registers that actually got read (as expected). */
+ clear_deps_for_inst_src(scan_inst, dispatch_width,
+ needs_dep, first_write_grf, write_len);
+
+ /* We insert our reads as late as possible since they're reading the
+ * result of a SEND, which has massive latency.
+ */
+ if (scan_inst->dst.file == GRF &&
+ scan_inst->dst.reg >= first_write_grf &&
+ scan_inst->dst.reg < first_write_grf + write_len &&
+ needs_dep[scan_inst->dst.reg - first_write_grf]) {
+ scan_inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
+ needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+ }
+
+ /* Continue the loop only if we haven't resolved all the dependencies */
+ int i;
+ for (i = 0; i < write_len; i++) {
+ if (needs_dep[i])
+ break;
+ }
+ if (i == write_len)
+ return;
+ }
+
+ /* If we hit the end of the program, resolve all remaining dependencies out
+ * of paranoia.
+ */
+ fs_inst *last_inst = (fs_inst *)this->instructions.get_tail();
+ assert(last_inst->eot);
+ for (int i = 0; i < write_len; i++) {
+ if (needs_dep[i])
+ last_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
+ }
+}
+
+void
+fs_visitor::insert_gen4_send_dependency_workarounds()
+{
+ if (intel->gen != 4 || intel->is_g4x)
+ return;
+
+ /* Note that we're done with register allocation, so GRF fs_regs always
+ * have a .reg_offset of 0.
+ */
+
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ if (inst->mlen != 0 && inst->dst.file == GRF) {
+ insert_gen4_pre_send_dependency_workarounds(inst);
+ insert_gen4_post_send_dependency_workarounds(inst);
+ }
+ }
+}
+
+/**
+ * Turns the generic expression-style uniform pull constant load instruction
+ * into a hardware-specific series of instructions for loading a pull
+ * constant.
+ *
+ * The expression style allows the CSE pass before this to optimize out
+ * repeated loads from the same offset, and gives the pre-register-allocation
+ * scheduling full flexibility, while the conversion to native instructions
+ * allows the post-register-allocation scheduler the best information
+ * possible.
+ */
+void
+fs_visitor::lower_uniform_pull_constant_loads()
+{
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
+ continue;
+
+ if (intel->gen >= 7) {
+ fs_reg const_offset_reg = inst->src[1];
+ assert(const_offset_reg.file == IMM &&
+ const_offset_reg.type == BRW_REGISTER_TYPE_UD);
+ const_offset_reg.imm.u /= 16;
+ fs_reg payload = fs_reg(this, glsl_type::uint_type);
+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UD);
+
+ fs_inst *setup1 = MOV(payload, fs_reg(g0));
+ setup1->force_writemask_all = true;
+ /* We don't need the second half of this vgrf to be filled with g1
+ * in the 16-wide case, but if we use force_uncompressed then live
+ * variable analysis won't consider this a def!
+ */
+
+ fs_inst *setup2 = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET,
+ payload, payload,
+ const_offset_reg);
+
+ setup1->ir = inst->ir;
+ setup1->annotation = inst->annotation;
+ inst->insert_before(setup1);
+ setup2->ir = inst->ir;
+ setup2->annotation = inst->annotation;
+ inst->insert_before(setup2);
+ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
+ inst->src[1] = payload;
+ } else {
+ /* Before register allocation, we didn't tell the scheduler about the
+ * MRF we use. We know it's safe to use this MRF because nothing
+ * else does except for register spill/unspill, which generates and
+ * uses its MRF within a single IR instruction.
+ */
+ inst->base_mrf = 14;
+ inst->mlen = 1;
+ }
+ }
+}
+
void
fs_visitor::dump_instruction(fs_inst *inst)
{
@@ -2500,6 +2778,8 @@ fs_visitor::run()
schedule_instructions(false);
+ lower_uniform_pull_constant_loads();
+
assign_curb_setup();
assign_urb_setup();
@@ -2522,6 +2802,12 @@ fs_visitor::run()
assert(force_uncompressed_stack == 0);
assert(force_sechalf_stack == 0);
+ /* This must come after all optimization and register allocation, since
+ * it inserts dead code that happens to have side effects, and it does
+ * so based on the actual physical registers in use.
+ */
+ insert_gen4_send_dependency_workarounds();
+
if (failed)
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 88fecb9..d1bb111 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -285,6 +285,7 @@ public:
fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition);
fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
uint32_t condition);
+ fs_inst *DEP_RESOLVE_MOV(int grf);
int type_size(const struct glsl_type *type);
fs_inst *get_instruction_generating_reg(fs_inst *start,
@@ -329,7 +330,11 @@ public:
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void schedule_instructions(bool post_reg_alloc);
+ void insert_gen4_send_dependency_workarounds();
+ void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst);
+ void insert_gen4_post_send_dependency_workarounds(fs_inst *inst);
void fail(const char *msg, ...);
+ void lower_uniform_pull_constant_loads();
void push_force_uncompressed();
void pop_force_uncompressed();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index c4ec1d9..194ed07 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -223,7 +223,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
inst->src[arg].file = entry->src.file;
inst->src[arg].reg = entry->src.reg;
inst->src[arg].reg_offset = entry->src.reg_offset;
- inst->src[arg].smear = entry->src.smear;
+ if (entry->src.smear != -1)
+ inst->src[arg].smear = entry->src.smear;
if (!inst->src[arg].abs) {
inst->src[arg].abs = entry->src.abs;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 70c143a..a13ca36 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -105,7 +105,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
/* Match current instruction's expression against those in AEB. */
if (inst->opcode == entry->generator->opcode &&
inst->saturate == entry->generator->saturate &&
- operands_match(entry->generator->src, inst->src)) {
+ inst->dst.type == entry->generator->dst.type &&
+ operands_match(entry->generator->src, inst->src)) {
found = true;
progress = true;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 45072da..365a2ec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -604,29 +604,8 @@ fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
{
assert(inst->mlen != 0);
- /* Clear any post destination dependencies that would be ignored by
- * the block read. See the B-Spec for pre-gen5 send instruction.
- *
- * This could use a better solution, since texture sampling and
- * math reads could potentially run into it as well -- anywhere
- * that we have a SEND with a destination that is a register that
- * was written but not read within the last N instructions (what's
- * N? unsure). This is rare because of dead code elimination, but
- * not impossible.
- */
- if (intel->gen == 4 && !intel->is_g4x)
- brw_MOV(p, brw_null_reg(), dst);
-
brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
inst->offset);
-
- if (intel->gen == 4 && !intel->is_g4x) {
- /* gen4 errata: destination from a send can't be used as a
- * destination until it's been read. Just read it so we don't
- * have to worry.
- */
- brw_MOV(p, brw_null_reg(), dst);
- }
}
void
@@ -637,19 +616,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
{
assert(inst->mlen != 0);
- /* Clear any post destination dependencies that would be ignored by
- * the block read. See the B-Spec for pre-gen5 send instruction.
- *
- * This could use a better solution, since texture sampling and
- * math reads could potentially run into it as well -- anywhere
- * that we have a SEND with a destination that is a register that
- * was written but not read within the last N instructions (what's
- * N? unsure). This is rare because of dead code elimination, but
- * not impossible.
- */
- if (intel->gen == 4 && !intel->is_g4x)
- brw_MOV(p, brw_null_reg(), dst);
-
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
@@ -660,14 +626,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
read_offset, surf_index);
-
- if (intel->gen == 4 && !intel->is_g4x) {
- /* gen4 errata: destination from a send can't be used as a
- * destination until it's been read. Just read it so we don't
- * have to worry.
- */
- brw_MOV(p, brw_null_reg(), dst);
- }
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d4f6fc9..573921c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -597,31 +597,9 @@ fs_visitor::visit(ir_expression *ir)
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
packed_consts.type = result.type;
- if (intel->gen >= 7) {
- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16);
- fs_reg payload = fs_reg(this, glsl_type::uint_type);
- struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
- BRW_REGISTER_TYPE_UD);
- fs_inst *setup = emit(MOV(payload, fs_reg(g0)));
- setup->force_writemask_all = true;
- /* We don't need the second half of this vgrf to be filled with g1
- * in the 16-wide case, but if we use force_uncompressed then live
- * variable analysis won't consider this a def!
- */
-
- emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload,
- payload, const_offset_reg);
- emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts,
- surf_index, payload);
- } else {
- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]);
- fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts,
- surf_index,
- const_offset_reg));
- pull->base_mrf = 14;
- pull->mlen = 1;
- }
+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
+ emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ packed_consts, surf_index, const_offset_reg));
packed_consts.smear = const_offset->value.u[0] % 16 / 4;
for (int i = 0; i < ir->type->vector_elements; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 3d53843..48635c5 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -238,6 +238,23 @@ static void calc_wm_input_sizes( struct brw_context *brw )
calc_sizes(&t);
+ /* _NEW_POINT
+ *
+ * If the SF will be replacing the vertex output with a reference to
+ * gl_PointCoord, then tell the fragment shader that the value actually
+ * does vary.
+ */
+ if (ctx->Point.PointSprite) {
+ for (int i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i]) {
+ t.size_masks[4-1] |= FRAG_BIT_TEX(i);
+ t.size_masks[3-1] |= FRAG_BIT_TEX(i);
+ t.size_masks[2-1] |= FRAG_BIT_TEX(i);
+ t.size_masks[1-1] |= FRAG_BIT_TEX(i);
+ }
+ }
+ }
+
if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) {
memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
@@ -246,7 +263,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
const struct brw_tracked_state brw_wm_input_sizes = {
.dirty = {
- .mesa = _NEW_LIGHT | _NEW_PROGRAM,
+ .mesa = _NEW_LIGHT | _NEW_PROGRAM | _NEW_POINT,
.brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 9c00ba8..885f6c2 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -114,15 +114,15 @@
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
-#define PCI_CHIP_HASWELL_CRW_GT1 0x0D12 /* Desktop */
-#define PCI_CHIP_HASWELL_CRW_GT2 0x0D22
-#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D32
-#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 /* Mobile */
-#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
-#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
-#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A /* Server */
-#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
-#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
+#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */
+#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12
+#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D22
+#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */
+#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16
+#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D26
+#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */
+#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A
+#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D2A
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index a951283..6d91534 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -130,6 +130,9 @@ struct gl_enable_attrib
GLboolean VertexProgramPointSize;
GLboolean VertexProgramTwoSide;
+ /* GL_ARB_fragment_program */
+ GLboolean FragmentProgram;
+
/* GL_ARB_point_sprite / GL_NV_point_sprite */
GLboolean PointSprite;
GLboolean FragmentShaderATI;
@@ -316,6 +319,10 @@ _mesa_PushAttrib(GLbitfield mask)
attr->VertexProgram = ctx->VertexProgram.Enabled;
attr->VertexProgramPointSize = ctx->VertexProgram.PointSizeEnabled;
attr->VertexProgramTwoSide = ctx->VertexProgram.TwoSideEnabled;
+
+ /* GL_ARB_fragment_program */
+ attr->FragmentProgram = ctx->FragmentProgram.Enabled;
+
save_attrib_data(&head, GL_ENABLE_BIT, attr);
/* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */
@@ -607,6 +614,11 @@ pop_enable_group(struct gl_context *ctx, const struct gl_enable_attrib *enable)
enable->VertexProgramTwoSide,
GL_VERTEX_PROGRAM_TWO_SIDE_ARB);
+ /* GL_ARB_fragment_program */
+ TEST_AND_UPDATE(ctx->FragmentProgram.Enabled,
+ enable->FragmentProgram,
+ GL_FRAGMENT_PROGRAM_ARB);
+
/* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */
TEST_AND_UPDATE(ctx->Color.sRGBEnabled, enable->sRGBEnabled,
GL_FRAMEBUFFER_SRGB);
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 5e9e539..df57b76 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1072,7 +1072,6 @@ _mesa_initialize_context(struct gl_context *ctx,
case API_OPENGLES2:
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- ctx->Point.PointSprite = GL_TRUE; /* always on for ES 2.x */
break;
}
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 8728540..c1e1658 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -917,7 +917,7 @@ _mesa_is_compressed_format(struct gl_context *ctx, GLenum format)
case GL_COMPRESSED_SIGNED_RG11_EAC:
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- return _mesa_is_gles3(ctx);
+ return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
case GL_PALETTE4_RGB8_OES:
case GL_PALETTE4_RGBA8_OES:
case GL_PALETTE4_R5_G6_B5_OES:
diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c
index 1778640..c925d4c 100644
--- a/src/mesa/main/points.c
+++ b/src/mesa/main/points.c
@@ -253,7 +253,8 @@ _mesa_init_point(struct gl_context *ctx)
* In a core context, the state will default to true, and the setters and
* getters are disabled.
*/
- ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE);
+ ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE ||
+ ctx->API == API_OPENGLES2);
ctx->Point.SpriteRMode = GL_ZERO; /* GL_NV_point_sprite (only!) */
ctx->Point.SpriteOrigin = GL_UPPER_LEFT; /* GL_ARB_point_sprite */
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index d1723b8..1b9525b 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -520,7 +520,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
}
}
- if (_mesa_is_gles3(ctx)) {
+ if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
switch (internalFormat) {
case GL_COMPRESSED_RGB8_ETC2:
case GL_COMPRESSED_SRGB8_ETC2:
@@ -3187,6 +3187,12 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
return;
}
+ if (!image) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glEGLImageTargetTexture2D(image=%p)", image);
+ return;
+ }
+
if (ctx->NewState & _NEW_PIXEL)
_mesa_update_state(ctx);
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 52ede13..6f18ec6 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
*params = (GLfloat) obj->Immutable;
break;
+ case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES:
+ if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external)
+ goto invalid_pname;
+ *params = obj->RequiredTextureImageUnits;
+ break;
+
case GL_TEXTURE_SRGB_DECODE_EXT:
if (!ctx->Extensions.EXT_texture_sRGB_decode)
goto invalid_pname;
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index f20df9e..7fdfa72 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -135,16 +135,12 @@ static void update_raster_state( struct st_context *st )
/* _NEW_POLYGON
*/
- if (ctx->Polygon.OffsetUnits != 0.0 ||
- ctx->Polygon.OffsetFactor != 0.0) {
- raster->offset_point = ctx->Polygon.OffsetPoint;
- raster->offset_line = ctx->Polygon.OffsetLine;
- raster->offset_tri = ctx->Polygon.OffsetFill;
- }
-
if (ctx->Polygon.OffsetPoint ||
ctx->Polygon.OffsetLine ||
ctx->Polygon.OffsetFill) {
+ raster->offset_point = ctx->Polygon.OffsetPoint;
+ raster->offset_line = ctx->Polygon.OffsetLine;
+ raster->offset_tri = ctx->Polygon.OffsetFill;
raster->offset_units = ctx->Polygon.OffsetUnits;
raster->offset_scale = ctx->Polygon.OffsetFactor;
}
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 63dbdb2..36fffe9 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -675,11 +675,12 @@ st_flush_bitmap_cache(struct st_context *st)
* \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
*/
static GLboolean
-accum_bitmap(struct st_context *st,
+accum_bitmap(struct gl_context *ctx,
GLint x, GLint y, GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap )
{
+ struct st_context *st = ctx->st;
struct bitmap_cache *cache = st->bitmap.cache;
int px = -999, py = -999;
const GLfloat z = st->ctx->Current.RasterPos[2];
@@ -729,9 +730,17 @@ accum_bitmap(struct st_context *st,
/* create the transfer if needed */
create_cache_trans(st);
+ /* PBO source... */
+ bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
+ if (!bitmap) {
+ return FALSE;
+ }
+
unpack_bitmap(st, px, py, width, height, unpack, bitmap,
cache->buffer, BITMAP_CACHE_WIDTH);
+ _mesa_unmap_pbo_source(ctx, unpack);
+
return GL_TRUE; /* accumulated */
}
@@ -764,7 +773,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
semantic_indexes);
}
- if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
+ if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
return;
pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index de62264..bff8d9b 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -283,7 +283,7 @@ st_draw_vbo(struct gl_context *ctx,
/* don't trim, restarts might be inside index list */
cso_draw_vbo(st->cso_context, &info);
}
- else if (u_trim_pipe_prim(info.mode, &info.count))
+ else if (u_trim_pipe_prim(prims[i].mode, &info.count))
cso_draw_vbo(st->cso_context, &info);
}
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index a9111b5..f56f7cb 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -1142,7 +1142,7 @@ st_print_shaders(struct gl_context *ctx)
static void
destroy_program_variants(struct st_context *st, struct gl_program *program)
{
- if (!program)
+ if (!program || program == &_mesa_DummyProgram)
return;
switch (program->Target) {