From 5cdc027b2927811731acc80ea56052dc34bc90d0 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Thu, 31 Jan 2013 14:40:55 -0500
Subject: [PATCH] mesa-9-r600g-limit-memory.patch to fix relocation issue

- force r600g to stay in gpu memory limit
---
 mesa-9-r600g-limit-memory.patch | 278 ++++++++++++++++++++++++++++++++
 mesa.spec                       |   8 +-
 2 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 mesa-9-r600g-limit-memory.patch

diff --git a/mesa-9-r600g-limit-memory.patch b/mesa-9-r600g-limit-memory.patch
new file mode 100644
index 0000000..eee0162
--- /dev/null
+++ b/mesa-9-r600g-limit-memory.patch
@@ -0,0 +1,278 @@
+From d47a08e9fff31bc9c880146706c4020d54db17b3 Mon Sep 17 00:00:00 2001
+From: Jerome Glisse <jglisse@redhat.com>
+Date: Wed, 30 Jan 2013 15:02:32 -0500
+Subject: [PATCH] r600g: add cs memory usage accounting and limit it v3
+ (backport for mesa 9.0)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We are now seing cs that can go over the vram+gtt size to avoid
+failing flush early cs that goes over 70% (gtt+vram) usage. 70%
+is use to allow some fragmentation.
+
+The idea is to compute a gross estimate of memory requirement of
+each draw call. After each draw call, memory will be precisely
+accounted. So the uncertainty is only on the current draw call.
+In practice this gave very good estimate (+/- 10% of the target
+memory limit).
+
+v2: Remove left over from testing version, remove useless NULL
+    checking. Improve commit message.
+v3: Add comment to code on memory accounting precision
+
+This version is a backport for mesa 9.0
+
+Signed-off-by: Jerome Glisse <jglisse@redhat.com>
+Reviewed-by: Marek Olšák <maraeo@gmail.com>
+---
+ src/gallium/drivers/r600/evergreen_state.c    |  4 ++++
+ src/gallium/drivers/r600/r600_hw_context.c    | 12 ++++++++++++
+ src/gallium/drivers/r600/r600_pipe.h          | 28 +++++++++++++++++++++++++++
+ src/gallium/drivers/r600/r600_state.c         |  4 ++++
+ src/gallium/drivers/r600/r600_state_common.c  | 13 ++++++++++++-
+ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++
+ src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++
+ 7 files changed, 81 insertions(+), 1 deletion(-)
+
+diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
+index 6bf4247..a17ba17 100644
+--- a/src/gallium/drivers/r600/evergreen_state.c
++++ b/src/gallium/drivers/r600/evergreen_state.c
+@@ -1721,6 +1721,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
+ 		res = (struct r600_resource*)surf->base.texture;
+ 		rtex = (struct r600_texture*)res;
+ 
++		r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
++
+ 		if (!surf->color_initialized) {
+ 			evergreen_init_color_surface(rctx, surf);
+ 		}
+@@ -1787,6 +1789,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
+ 		surf = (struct r600_surface*)state->zsbuf;
+ 		res = (struct r600_resource*)surf->base.texture;
+ 
++		r600_context_add_resource_size(ctx, state->zsbuf->texture);
++
+ 		if (!surf->depth_initialized) {
+ 			evergreen_init_depth_surface(rctx, surf);
+ 		}
+diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
+index af27fd9..d5efd86 100644
+--- a/src/gallium/drivers/r600/r600_hw_context.c
++++ b/src/gallium/drivers/r600/r600_hw_context.c
+@@ -635,6 +635,16 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
+ {
+ 	struct r600_atom *state;
+ 
++	if (!ctx->ws->cs_memory_below_limit(ctx->cs, ctx->vram, ctx->gtt)) {
++		ctx->gtt = 0;
++		ctx->vram = 0;
++		r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
++		return;
++	}
++	/* all will be accounted once relocation are emited */
++	ctx->gtt = 0;
++	ctx->vram = 0;
++
+ 	/* The number of dwords we already used in the CS so far. */
+ 	num_dw += ctx->cs->cdw;
+ 
+@@ -953,6 +963,8 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
+ 
+ 	ctx->pm4_dirty_cdwords = 0;
+ 	ctx->flags = 0;
++	ctx->gtt = 0;
++	ctx->vram = 0;
+ 
+ 	/* Begin a new CS. */
+ 	r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
+diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
+index 721334d..ba75c9d 100644
+--- a/src/gallium/drivers/r600/r600_pipe.h
++++ b/src/gallium/drivers/r600/r600_pipe.h
+@@ -371,6 +371,10 @@ struct r600_context {
+ 
+ 	unsigned default_ps_gprs, default_vs_gprs;
+ 
++	/* current unaccounted memory usage */
++	uint64_t			vram;
++	uint64_t			gtt;
++
+ 	/* States based on r600_atom. */
+ 	struct list_head		dirty_states;
+ 	struct r600_command_buffer	start_cs_cmd; /* invariant state mostly */
+@@ -886,4 +890,28 @@ static INLINE uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_
+ 	return rscreen->ws->buffer_get_virtual_address(rresource->cs_buf);
+ }
+ 
++static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
++{
++	struct r600_context *rctx = (struct r600_context *)ctx;
++	struct r600_resource *rr = (struct r600_resource *)r;
++
++	if (r == NULL) {
++		return;
++	}
++
++	/*
++	 * The idea is to compute a gross estimate of memory requirement of
++	 * each draw call. After each draw call, memory will be precisely
++	 * accounted. So the uncertainty is only on the current draw call.
++	 * In practice this gave very good estimate (+/- 10% of the target
++	 * memory limit).
++	 */
++	if (rr->domains & RADEON_DOMAIN_GTT) {
++		rctx->gtt += rr->buf->size;
++	}
++	if (rr->domains & RADEON_DOMAIN_VRAM) {
++		rctx->vram += rr->buf->size;
++	}
++}
++
+ #endif
+diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
+index c120ddc..7a1d844 100644
+--- a/src/gallium/drivers/r600/r600_state.c
++++ b/src/gallium/drivers/r600/r600_state.c
+@@ -1615,6 +1615,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
+ 		res = (struct r600_resource*)surf->base.texture;
+ 		rtex = (struct r600_texture*)res;
+ 
++		r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
++
+ 		if (!surf->color_initialized || force_cmask_fmask) {
+ 			r600_init_color_surface(rctx, surf, force_cmask_fmask);
+ 			if (force_cmask_fmask) {
+@@ -1673,6 +1675,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
+ 		surf = (struct r600_surface*)state->zsbuf;
+ 		res = (struct r600_resource*)surf->base.texture;
+ 
++		r600_context_add_resource_size(ctx, state->zsbuf->texture);
++
+ 		if (!surf->depth_initialized) {
+ 			r600_init_depth_surface(rctx, surf);
+ 		}
+diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
+index 26af6f6..68cbd16 100644
+--- a/src/gallium/drivers/r600/r600_state_common.c
++++ b/src/gallium/drivers/r600/r600_state_common.c
+@@ -504,7 +504,8 @@ void r600_set_index_buffer(struct pipe_context *ctx,
+ 
+ 	if (ib) {
+ 		pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
+-	        memcpy(&rctx->index_buffer, ib, sizeof(*ib));
++		memcpy(&rctx->index_buffer, ib, sizeof(*ib));
++		r600_context_add_resource_size(ctx, ib->buffer);
+ 	} else {
+ 		pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
+ 	}
+@@ -549,6 +550,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
+ 				vb[i].buffer_offset = input[i].buffer_offset;
+ 				pipe_resource_reference(&vb[i].buffer, input[i].buffer);
+ 				new_buffer_mask |= 1 << i;
++				r600_context_add_resource_size(ctx, input[i].buffer);
+ 			} else {
+ 				pipe_resource_reference(&vb[i].buffer, NULL);
+ 				disable_mask |= 1 << i;
+@@ -648,6 +650,7 @@ void r600_set_sampler_views(struct pipe_context *pipe,
+ 
+ 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
+ 			new_mask |= 1 << i;
++			r600_context_add_resource_size(pipe, views[i]->texture);
+ 		} else {
+ 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
+ 			disable_mask |= 1 << i;
+@@ -822,6 +825,8 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
+ 	rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
+ 	r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
+ 
++	r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
++
+ 	if (rctx->chip_class <= R700) {
+ 		bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
+ 
+@@ -848,6 +853,8 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
+ 	if (state) {
+ 		r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
+ 
++		r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
++
+ 		if (rctx->chip_class < EVERGREEN && rctx->ps_shader)
+ 			r600_adjust_gprs(rctx);
+ 	}
+@@ -957,10 +964,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ 		} else {
+ 			u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
+ 		}
++		/* account it in gtt */
++		rctx->gtt += input->buffer_size;
+ 	} else {
+ 		/* Setup the hw buffer. */
+ 		cb->buffer_offset = input->buffer_offset;
+ 		pipe_resource_reference(&cb->buffer, input->buffer);
++		r600_context_add_resource_size(ctx, input->buffer);
+ 	}
+ 
+ 	state->enabled_mask |= 1 << index;
+@@ -1023,6 +1033,7 @@ void r600_set_so_targets(struct pipe_context *ctx,
+ 	/* Set the new targets. */
+ 	for (i = 0; i < num_targets; i++) {
+ 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
++		r600_context_add_resource_size(ctx, targets[i]->buffer);
+ 	}
+ 	for (; i < rctx->num_so_targets; i++) {
+ 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
+diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+index 983c915..1ad23e3 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
++++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+@@ -366,6 +366,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
+     return status;
+ }
+ 
++static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
++{
++    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
++    boolean status =
++        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
++        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
++
++    return status;
++}
++
+ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
+                                       struct radeon_winsys_cs_handle *buf)
+ {
+@@ -549,6 +559,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
+     ws->base.cs_destroy = radeon_drm_cs_destroy;
+     ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
+     ws->base.cs_validate = radeon_drm_cs_validate;
++    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
+     ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
+     ws->base.cs_flush = radeon_drm_cs_flush;
+     ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
+diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
+index 4eb57fb..3dd91cc 100644
+--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
++++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
+@@ -309,6 +309,16 @@ struct radeon_winsys {
+     boolean (*cs_validate)(struct radeon_winsys_cs *cs);
+ 
+     /**
++     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
++     * added so far.
++     *
++     * \param cs        A command stream to validate.
++     * \param vram      VRAM memory size pending to be use
++     * \param gtt       GTT memory size pending to be use
++     */
++    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
++
++    /**
+      * Write a relocated dword to a command buffer.
+      *
+      * \param cs        A command stream the relocation is written to.
+-- 
+1.8.1
+
diff --git a/mesa.spec b/mesa.spec
index 608b8ac..d3dbba2 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -49,7 +49,7 @@
 Summary: Mesa graphics libraries
 Name: mesa
 Version: 9.0.1
-Release: 3%{?dist}
+Release: 4%{?dist}
 License: MIT
 Group: System Environment/Libraries
 URL: http://www.mesa3d.org
@@ -68,6 +68,7 @@ Patch9: mesa-8.0-llvmpipe-shmget.patch
 Patch11: mesa-8.0-nouveau-tfp-blacklist.patch
 Patch12: mesa-8.0.1-fix-16bpp.patch
 Patch13: mesa-9.0.1-less-cxx-please.patch
+Patch14: mesa-9-r600g-limit-memory.patch
 
 BuildRequires: pkgconfig autoconf automake libtool
 %if %{with_hardware}
@@ -292,6 +293,8 @@ Mesa shared glapi
 
 %patch13 -p1 -b .less-cpp
 
+%patch14 -p1 -b .r600g-limit
+
 # default to dri (not xlib) for libGL on all arches
 # XXX please fix upstream
 sed -i 's/^default_driver.*$/default_driver="dri"/' configure.ac
@@ -574,6 +577,9 @@ rm -rf $RPM_BUILD_ROOT
 %endif
 
 %changelog
+* Thu Jan 31 2013 Jerome Glisse <jglisse@redhat.com> 9.0.1-4
+- force r600g to stay in gpu memory limit
+
 * Thu Dec 20 2012 Adam Jackson <ajax@redhat.com> 9.0.1-3
 - mesa-9.0.1-22-gd0a9ab2.patch: Sync with git
 - Build with -fno-rtti -fno-exceptions, modest size and speed win