From 5cdc027b2927811731acc80ea56052dc34bc90d0 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 31 Jan 2013 14:40:55 -0500 Subject: [PATCH] mesa-9-r600g-limit-memory.patch to fix relocation issue - force r600g to stay in gpu memory limit --- mesa-9-r600g-limit-memory.patch | 278 ++++++++++++++++++++++++++++++++ mesa.spec | 8 +- 2 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 mesa-9-r600g-limit-memory.patch diff --git a/mesa-9-r600g-limit-memory.patch b/mesa-9-r600g-limit-memory.patch new file mode 100644 index 0000000..eee0162 --- /dev/null +++ b/mesa-9-r600g-limit-memory.patch @@ -0,0 +1,278 @@ +From d47a08e9fff31bc9c880146706c4020d54db17b3 Mon Sep 17 00:00:00 2001 +From: Jerome Glisse +Date: Wed, 30 Jan 2013 15:02:32 -0500 +Subject: [PATCH] r600g: add cs memory usage accounting and limit it v3 + (backport for mesa 9.0) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We are now seing cs that can go over the vram+gtt size to avoid +failing flush early cs that goes over 70% (gtt+vram) usage. 70% +is use to allow some fragmentation. + +The idea is to compute a gross estimate of memory requirement of +each draw call. After each draw call, memory will be precisely +accounted. So the uncertainty is only on the current draw call. +In practice this gave very good estimate (+/- 10% of the target +memory limit). + +v2: Remove left over from testing version, remove useless NULL + checking. Improve commit message. +v3: Add comment to code on memory accounting precision + +This version is a backport for mesa 9.0 + +Signed-off-by: Jerome Glisse +Reviewed-by: Marek Olšák +--- + src/gallium/drivers/r600/evergreen_state.c | 4 ++++ + src/gallium/drivers/r600/r600_hw_context.c | 12 ++++++++++++ + src/gallium/drivers/r600/r600_pipe.h | 28 +++++++++++++++++++++++++++ + src/gallium/drivers/r600/r600_state.c | 4 ++++ + src/gallium/drivers/r600/r600_state_common.c | 13 ++++++++++++- + src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++ + src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++ + 7 files changed, 81 insertions(+), 1 deletion(-) + +diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c +index 6bf4247..a17ba17 100644 +--- a/src/gallium/drivers/r600/evergreen_state.c ++++ b/src/gallium/drivers/r600/evergreen_state.c +@@ -1721,6 +1721,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, + res = (struct r600_resource*)surf->base.texture; + rtex = (struct r600_texture*)res; + ++ r600_context_add_resource_size(ctx, state->cbufs[i]->texture); ++ + if (!surf->color_initialized) { + evergreen_init_color_surface(rctx, surf); + } +@@ -1787,6 +1789,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, + surf = (struct r600_surface*)state->zsbuf; + res = (struct r600_resource*)surf->base.texture; + ++ r600_context_add_resource_size(ctx, state->zsbuf->texture); ++ + if (!surf->depth_initialized) { + evergreen_init_depth_surface(rctx, surf); + } +diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c +index af27fd9..d5efd86 100644 +--- a/src/gallium/drivers/r600/r600_hw_context.c ++++ b/src/gallium/drivers/r600/r600_hw_context.c +@@ -635,6 +635,16 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, + { + struct r600_atom *state; + ++ if (!ctx->ws->cs_memory_below_limit(ctx->cs, ctx->vram, ctx->gtt)) { ++ ctx->gtt = 0; ++ ctx->vram = 0; ++ r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); ++ return; ++ } ++ /* all will be accounted once relocation are emited */ ++ ctx->gtt = 0; ++ ctx->vram = 0; ++ + /* The number of dwords we already used in the CS so far. */ + num_dw += ctx->cs->cdw; + +@@ -953,6 +963,8 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) + + ctx->pm4_dirty_cdwords = 0; + ctx->flags = 0; ++ ctx->gtt = 0; ++ ctx->vram = 0; + + /* Begin a new CS. */ + r600_emit_atom(ctx, &ctx->start_cs_cmd.atom); +diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h +index 721334d..ba75c9d 100644 +--- a/src/gallium/drivers/r600/r600_pipe.h ++++ b/src/gallium/drivers/r600/r600_pipe.h +@@ -371,6 +371,10 @@ struct r600_context { + + unsigned default_ps_gprs, default_vs_gprs; + ++ /* current unaccounted memory usage */ ++ uint64_t vram; ++ uint64_t gtt; ++ + /* States based on r600_atom. */ + struct list_head dirty_states; + struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ +@@ -886,4 +890,28 @@ static INLINE uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_ + return rscreen->ws->buffer_get_virtual_address(rresource->cs_buf); + } + ++static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) ++{ ++ struct r600_context *rctx = (struct r600_context *)ctx; ++ struct r600_resource *rr = (struct r600_resource *)r; ++ ++ if (r == NULL) { ++ return; ++ } ++ ++ /* ++ * The idea is to compute a gross estimate of memory requirement of ++ * each draw call. After each draw call, memory will be precisely ++ * accounted. So the uncertainty is only on the current draw call. ++ * In practice this gave very good estimate (+/- 10% of the target ++ * memory limit). ++ */ ++ if (rr->domains & RADEON_DOMAIN_GTT) { ++ rctx->gtt += rr->buf->size; ++ } ++ if (rr->domains & RADEON_DOMAIN_VRAM) { ++ rctx->vram += rr->buf->size; ++ } ++} ++ + #endif +diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c +index c120ddc..7a1d844 100644 +--- a/src/gallium/drivers/r600/r600_state.c ++++ b/src/gallium/drivers/r600/r600_state.c +@@ -1615,6 +1615,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, + res = (struct r600_resource*)surf->base.texture; + rtex = (struct r600_texture*)res; + ++ r600_context_add_resource_size(ctx, state->cbufs[i]->texture); ++ + if (!surf->color_initialized || force_cmask_fmask) { + r600_init_color_surface(rctx, surf, force_cmask_fmask); + if (force_cmask_fmask) { +@@ -1673,6 +1675,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, + surf = (struct r600_surface*)state->zsbuf; + res = (struct r600_resource*)surf->base.texture; + ++ r600_context_add_resource_size(ctx, state->zsbuf->texture); ++ + if (!surf->depth_initialized) { + r600_init_depth_surface(rctx, surf); + } +diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c +index 26af6f6..68cbd16 100644 +--- a/src/gallium/drivers/r600/r600_state_common.c ++++ b/src/gallium/drivers/r600/r600_state_common.c +@@ -504,7 +504,8 @@ void r600_set_index_buffer(struct pipe_context *ctx, + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); +- memcpy(&rctx->index_buffer, ib, sizeof(*ib)); ++ memcpy(&rctx->index_buffer, ib, sizeof(*ib)); ++ r600_context_add_resource_size(ctx, ib->buffer); + } else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + } +@@ -549,6 +550,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + vb[i].buffer_offset = input[i].buffer_offset; + pipe_resource_reference(&vb[i].buffer, input[i].buffer); + new_buffer_mask |= 1 << i; ++ r600_context_add_resource_size(ctx, input[i].buffer); + } else { + pipe_resource_reference(&vb[i].buffer, NULL); + disable_mask |= 1 << i; +@@ -648,6 +650,7 @@ void r600_set_sampler_views(struct pipe_context *pipe, + + pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]); + new_mask |= 1 << i; ++ r600_context_add_resource_size(pipe, views[i]->texture); + } else { + pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL); + disable_mask |= 1 << i; +@@ -822,6 +825,8 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) + rctx->ps_shader = (struct r600_pipe_shader_selector *)state; + r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); + ++ r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo); ++ + if (rctx->chip_class <= R700) { + bool multiwrite = rctx->ps_shader->current->shader.fs_write_all; + +@@ -848,6 +853,8 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) + if (state) { + r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate); + ++ r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo); ++ + if (rctx->chip_class < EVERGREEN && rctx->ps_shader) + r600_adjust_gprs(rctx); + } +@@ -957,10 +964,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + } else { + u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer); + } ++ /* account it in gtt */ ++ rctx->gtt += input->buffer_size; + } else { + /* Setup the hw buffer. */ + cb->buffer_offset = input->buffer_offset; + pipe_resource_reference(&cb->buffer, input->buffer); ++ r600_context_add_resource_size(ctx, input->buffer); + } + + state->enabled_mask |= 1 << index; +@@ -1023,6 +1033,7 @@ void r600_set_so_targets(struct pipe_context *ctx, + /* Set the new targets. */ + for (i = 0; i < num_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); ++ r600_context_add_resource_size(ctx, targets[i]->buffer); + } + for (; i < rctx->num_so_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL); +diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +index 983c915..1ad23e3 100644 +--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c ++++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +@@ -366,6 +366,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) + return status; + } + ++static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt) ++{ ++ struct radeon_drm_cs *cs = radeon_drm_cs(rcs); ++ boolean status = ++ (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 && ++ (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7; ++ ++ return status; ++} ++ + static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, + struct radeon_winsys_cs_handle *buf) + { +@@ -549,6 +559,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) + ws->base.cs_destroy = radeon_drm_cs_destroy; + ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; + ws->base.cs_validate = radeon_drm_cs_validate; ++ ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; + ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; + ws->base.cs_flush = radeon_drm_cs_flush; + ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; +diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h +index 4eb57fb..3dd91cc 100644 +--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h ++++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h +@@ -309,6 +309,16 @@ struct radeon_winsys { + boolean (*cs_validate)(struct radeon_winsys_cs *cs); + + /** ++ * Return TRUE if there is enough memory in VRAM and GTT for the relocs ++ * added so far. ++ * ++ * \param cs A command stream to validate. ++ * \param vram VRAM memory size pending to be use ++ * \param gtt GTT memory size pending to be use ++ */ ++ boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt); ++ ++ /** + * Write a relocated dword to a command buffer. + * + * \param cs A command stream the relocation is written to. +-- +1.8.1 + diff --git a/mesa.spec b/mesa.spec index 608b8ac..d3dbba2 100644 --- a/mesa.spec +++ b/mesa.spec @@ -49,7 +49,7 @@ Summary: Mesa graphics libraries Name: mesa Version: 9.0.1 -Release: 3%{?dist} +Release: 4%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -68,6 +68,7 @@ Patch9: mesa-8.0-llvmpipe-shmget.patch Patch11: mesa-8.0-nouveau-tfp-blacklist.patch Patch12: mesa-8.0.1-fix-16bpp.patch Patch13: mesa-9.0.1-less-cxx-please.patch +Patch14: mesa-9-r600g-limit-memory.patch BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} @@ -292,6 +293,8 @@ Mesa shared glapi %patch13 -p1 -b .less-cpp +%patch14 -p1 -b .r600g-limit + # default to dri (not xlib) for libGL on all arches # XXX please fix upstream sed -i 's/^default_driver.*$/default_driver="dri"/' configure.ac @@ -574,6 +577,9 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Thu Jan 31 2013 Jerome Glisse 9.0.1-4 +- force r600g to stay in gpu memory limit + * Thu Dec 20 2012 Adam Jackson 9.0.1-3 - mesa-9.0.1-22-gd0a9ab2.patch: Sync with git - Build with -fno-rtti -fno-exceptions, modest size and speed win