diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index e9144ac..b6ed58b 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -48,7 +48,8 @@ SYMLINKS = \ COMMON_SYMLINKS = \ radeon_chipset.h \ radeon_screen.c \ - radeon_screen.h + radeon_screen.h \ + radeon_buffer.h ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 6ca9342..3bb1ff4 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -11,15 +11,6 @@ ifeq ($(USING_EGL), 1) EGL_SOURCES = server/radeon_egl.c endif -COMMON_SOURCES = \ - ../../common/driverfuncs.c \ - ../common/mm.c \ - ../common/utils.c \ - ../common/texmem.c \ - ../common/vblank.c \ - ../common/xmlconfig.c \ - ../common/dri_util.c - DRIVER_SOURCES = \ radeon_screen.c \ radeon_context.c \ @@ -36,6 +27,7 @@ DRIVER_SOURCES = \ r300_texmem.c \ r300_tex.c \ r300_texstate.c \ + r300_mipmap_tree.c \ radeon_program.c \ radeon_program_alu.c \ radeon_program_pair.c \ @@ -51,7 +43,7 @@ DRIVER_SOURCES = \ r300_swtcl.c \ $(EGL_SOURCES) -C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) +C_SOURCES = $(COMMON_SOURCES) $(COMMON_BM_SOURCES) $(DRIVER_SOURCES) DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 @@ -68,7 +60,8 @@ COMMON_SYMLINKS = \ radeon_chipset.h \ radeon_screen.c \ radeon_screen.h \ - radeon_span.h + radeon_span.h \ + radeon_buffer.h ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index c069660..493b0ac 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -51,11 +51,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_cmdbuf.h" #include "r300_emit.h" +#include "r300_mem.h" +#include "r300_mipmap_tree.h" #include "r300_state.h" // Set this to 1 for extremely verbose debugging of command buffers #define DEBUG_CMDBUF 0 +/** # of dwords reserved for additional instructions that may need to be written + * during flushing. + */ +#define SPACE_FOR_FLUSHING 4 + /** * Send the current command buffer via ioctl to the hardware. */ @@ -66,24 +73,42 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) drm_radeon_cmd_buffer_t cmd; int start; + if (r300->cmdbuf.flushing) { + fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n"); + exit(-1); + } + r300->cmdbuf.flushing = 1; + if (r300->radeon.lost_context) { start = 0; r300->radeon.lost_context = GL_FALSE; } else - start = r300->cmdbuf.count_reemit; + start = r300->cmdbuf.reemit; if (RADEON_DEBUG & DEBUG_IOCTL) { fprintf(stderr, "%s from %s - %i cliprects\n", __FUNCTION__, caller, r300->radeon.numClipRects); - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) - for (i = start; i < r300->cmdbuf.count_used; ++i) + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) { + fprintf(stderr, "written: %d committed: %d\n", r300->cmdbuf.written, r300->cmdbuf.committed); + for (i = start; i < r300->cmdbuf.written; ++i) fprintf(stderr, "%d: %08x\n", i, - r300->cmdbuf.cmd_buf[i]); + ((uint32_t*)r300->cmdbuf.buf->virtual)[i]); + } } - cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start); - cmd.bufsz = (r300->cmdbuf.count_used - start) * 4; + if (r300->cmdbuf.written != r300->cmdbuf.committed) { + _mesa_problem(r300->radeon.glCtx, + "Command buffer contains %d uncommitted dwords\n" + "in r300FlushCmdBufLocked called from %s.\n", + r300->cmdbuf.written - r300->cmdbuf.committed, caller); + } + + dri_bo_unmap(r300->cmdbuf.buf); + dri_process_relocs(r300->cmdbuf.buf, 0); + + cmd.buf = (char *)r300->cmdbuf.buf->virtual + 4*start; + cmd.bufsz = (r300->cmdbuf.committed - start) * 4; if (r300->radeon.state.scissor.enabled) { cmd.nbox = r300->radeon.state.scissor.numClipRects; @@ -103,9 +128,19 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) radeonWaitForIdleLocked(&r300->radeon); } + dri_post_submit(r300->cmdbuf.buf, 0); + dri_bo_unreference(r300->cmdbuf.buf); + r300->dma.nr_released_bufs = 0; - r300->cmdbuf.count_used = 0; - r300->cmdbuf.count_reemit = 0; + r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf", + r300->cmdbuf.size*4, 16, DRM_BO_MEM_CMDBUF); + r300->cmdbuf.written = 0; + r300->cmdbuf.reserved = 0; + r300->cmdbuf.committed = 0; + r300->cmdbuf.reemit = 0; + dri_bo_map(r300->cmdbuf.buf, GL_TRUE); + + r300->cmdbuf.flushing = 0; return ret; } @@ -115,9 +150,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) int ret; LOCK_HARDWARE(&r300->radeon); - ret = r300FlushCmdBufLocked(r300, caller); - UNLOCK_HARDWARE(&r300->radeon); if (ret) { @@ -128,6 +161,44 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) return ret; } +/** + * Make sure that enough space is available in the command buffer + * by flushing if necessary. + * + * \param dwords The number of dwords we need to be free on the command buffer + */ +void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller) +{ + assert(dwords < r300->cmdbuf.size); + + if (!r300->cmdbuf.flushing) + dwords += SPACE_FOR_FLUSHING; + + if (r300->cmdbuf.written + dwords > r300->cmdbuf.size) + r300FlushCmdBuf(r300, caller); +} + +void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line) +{ + assert(r300->cmdbuf.written == r300->cmdbuf.reserved); + + r300EnsureCmdBufSpace(r300, n, function); + + if (autostate && !r300->cmdbuf.written) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "Reemit state after flush (from %s)\n", function); + r300EmitState(r300); + } + + r300->cmdbuf.reserved += n; + assert(r300->cmdbuf.reserved < r300->cmdbuf.size); + + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n", + n, r300->cmdbuf.written, function, line); +} + static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) { int i; @@ -152,33 +223,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat */ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) { + BATCH_LOCALS(r300); struct r300_state_atom *atom; - uint32_t *dest; int dwords; - dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; - - /* Emit WAIT */ - *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); - dest++; - r300->cmdbuf.count_used++; - - /* Emit cache flush */ - *dest = cmdpacket0(R300_TX_INVALTAGS, 1); - dest++; - r300->cmdbuf.count_used++; - - *dest = R300_TX_FLUSH; - dest++; - r300->cmdbuf.count_used++; - - /* Emit END3D */ - *dest = cmdpacify(); - dest++; - r300->cmdbuf.count_used++; + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN)); + OUT_BATCH(cmdpacket0(R300_TX_INVALTAGS, 1)); + OUT_BATCH(R300_TX_FLUSH); + OUT_BATCH(cmdpacify()); + END_BATCH(); /* Emit actual atoms */ - foreach(atom, &r300->hw.atomlist) { if ((atom->dirty || r300->hw.all_dirty) == dirty) { dwords = (*atom->check) (r300, atom); @@ -186,9 +242,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { r300PrintStateAtom(r300, atom); } - memcpy(dest, atom->cmd, dwords * 4); - dest += dwords; - r300->cmdbuf.count_used += dwords; + if (atom->emit) { + (*atom->emit)(r300); + } else { + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + } atom->dirty = GL_FALSE; } else { if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { @@ -198,6 +258,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) } } } + + COMMIT_BATCH(); } /** @@ -211,22 +273,21 @@ void r300EmitState(r300ContextPtr r300) if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS)) fprintf(stderr, "%s\n", __FUNCTION__); - if (r300->cmdbuf.count_used && !r300->hw.is_dirty + if (r300->cmdbuf.written && !r300->hw.is_dirty && !r300->hw.all_dirty) return; /* To avoid going across the entire set of states multiple times, just check - * for enough space for the case of emitting all state, and inline the - * r300AllocCmdBuf code here without all the checks. + * for enough space for the case of emitting all state. */ r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__); - if (!r300->cmdbuf.count_used) { + if (!r300->cmdbuf.written) { if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "Begin reemit state\n"); r300EmitAtoms(r300, GL_FALSE); - r300->cmdbuf.count_reemit = r300->cmdbuf.count_used; + r300->cmdbuf.reemit = r300->cmdbuf.committed; } if (RADEON_DEBUG & DEBUG_STATE) @@ -234,7 +295,7 @@ void r300EmitState(r300ContextPtr r300) r300EmitAtoms(r300, GL_TRUE); - assert(r300->cmdbuf.count_used < r300->cmdbuf.size); + assert(r300->cmdbuf.written < r300->cmdbuf.size); r300->hw.is_dirty = GL_FALSE; r300->hw.all_dirty = GL_FALSE; @@ -244,6 +305,79 @@ void r300EmitState(r300ContextPtr r300) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) +static void emit_tex_offsets(r300ContextPtr r300) +{ + BATCH_LOCALS(r300); + int numtmus = packet0_count(r300->hw.tex.offset.cmd); + + if (numtmus) { + int i; + + BEGIN_BATCH(numtmus + 1); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, numtmus); + for(i = 0; i < numtmus; ++i) { + r300TexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, DRM_RELOC_TXOFFSET); + } else if (!t) { + OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); + } else { + OUT_BATCH(t->override_offset); + } + } + END_BATCH(); + } +} + +static void emit_cb_offset(r300ContextPtr r300) +{ + BATCH_LOCALS(r300); + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + + rrb = r300->radeon.state.color.rrb; + if (!rrb) { + fprintf(stderr, "no rrb\n"); + return; + } + + cbpitch = rrb->pitch; + if (rrb->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (r300->radeon.sarea->tiling_enabled) + cbpitch |= R300_COLOR_TILE_ENABLE; + + BEGIN_BATCH(4); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); + OUT_BATCH(cbpitch); + END_BATCH(); +} + +static void emit_zb_offset(r300ContextPtr r300) +{ + BATCH_LOCALS(r300); + struct radeon_renderbuffer *rrb; + uint32_t zbpitch; + + rrb = r300->radeon.state.depth_buffer; + if (!rrb) + return; + + zbpitch = rrb->pitch; + + BEGIN_BATCH(3); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 2); + OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); + OUT_BATCH(zbpitch); + END_BATCH(); + +} + static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) { return atom->cmd_size; @@ -480,8 +614,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rop, always, 2, 0); r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1); ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); - r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); - r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); + r300->hw.cb.emit = &emit_cb_offset; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); @@ -495,7 +628,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.zstencil_format.cmd[0] = cmdpacket0(R300_ZB_FORMAT, 4); ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); - r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); + r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); ALLOC_STATE(unk4F30, always, 3, 0); @@ -562,9 +695,10 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0); - ALLOC_STATE(tex.offset, variable, mtu + 1, 0); + ALLOC_STATE(tex.offset, variable, 1, 0); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, 0); + r300->hw.tex.offset.emit = &emit_tex_offsets; ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = @@ -597,10 +731,14 @@ void r300InitCmdBuf(r300ContextPtr r300) size * 4, r300->hw.max_state_size * 4); } + r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf", + size*4, 16, DRM_BO_MEM_CMDBUF); r300->cmdbuf.size = size; - r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4); - r300->cmdbuf.count_used = 0; - r300->cmdbuf.count_reemit = 0; + r300->cmdbuf.written = 0; + r300->cmdbuf.reserved = 0; + r300->cmdbuf.committed = 0; + r300->cmdbuf.reemit = 0; + dri_bo_map(r300->cmdbuf.buf, GL_TRUE); } /** @@ -610,66 +748,10 @@ void r300DestroyCmdBuf(r300ContextPtr r300) { struct r300_state_atom *atom; - FREE(r300->cmdbuf.cmd_buf); + dri_bo_unmap(r300->cmdbuf.buf); + dri_bo_unreference(r300->cmdbuf.buf); foreach(atom, &r300->hw.atomlist) { FREE(atom->cmd); } } - -void r300EmitBlit(r300ContextPtr rmesa, - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, GLuint w, GLuint h) -{ - drm_r300_cmd_header_t *cmd; - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", - __FUNCTION__, src_pitch, src_offset, srcx, srcy, - dst_pitch, dst_offset, dstx, dsty, w, h); - - assert((src_pitch & 63) == 0); - assert((dst_pitch & 63) == 0); - assert((src_offset & 1023) == 0); - assert((dst_offset & 1023) == 0); - assert(w < (1 << 16)); - assert(h < (1 << 16)); - - cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); - - cmd[0].header.cmd_type = R300_CMD_PACKET3; - cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; - cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); - cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_S | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); - - cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); - cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); - cmd[5].u = (srcx << 16) | srcy; - cmd[6].u = (dstx << 16) | dsty; /* dst */ - cmd[7].u = (w << 16) | h; -} - -void r300EmitWait(r300ContextPtr rmesa, GLuint flags) -{ - drm_r300_cmd_header_t *cmd; - - assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); - - cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].u = 0; - cmd[0].wait.cmd_type = R300_CMD_WAIT; - cmd[0].wait.flags = flags; -} diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index a8eaa58..4708a4c 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -45,29 +45,88 @@ extern void r300EmitState(r300ContextPtr r300); extern void r300InitCmdBuf(r300ContextPtr r300); extern void r300DestroyCmdBuf(r300ContextPtr r300); +extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller); + +extern void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line); /** - * Make sure that enough space is available in the command buffer - * by flushing if necessary. - * - * \param dwords The number of dwords we need to be free on the command buffer + * Every function writing to the command buffer needs to declare this + * to get the necessary local variables. */ -static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300, - int dwords, const char *caller) -{ - assert(dwords < r300->cmdbuf.size); +#define BATCH_LOCALS(r300) \ + const r300ContextPtr b_l_r300 = r300 - if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size) - r300FlushCmdBuf(r300, caller); -} +/** + * Prepare writing n dwords to the command buffer, + * including producing any necessary state emits on buffer wraparound. + */ +#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, GL_TRUE, __FUNCTION__, __LINE__) + +/** + * Same as BEGIN_BATCH, but do not cause automatic state emits. + */ +#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, GL_FALSE, __FUNCTION__, __LINE__) + +/** + * Write one dword to the command buffer. + */ +#define OUT_BATCH(data) \ + do { \ + if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \ + ((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \ + } else { \ + _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \ + } \ + } while(0) /** - * Allocate the given number of dwords in the command buffer and return - * a pointer to the allocated area. - * When necessary, these functions cause a flush. r300AllocCmdBuf() also - * causes state reemission after a flush. This is necessary to ensure - * correct hardware state after an unlock. + * Write a relocated dword to the command buffer. */ +#define OUT_BATCH_RELOC(data, bo, offset, flags) \ + do { \ + if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \ + dri_emit_reloc(b_l_r300->cmdbuf.buf, flags, offset, 4*b_l_r300->cmdbuf.written, bo); \ + ((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \ + } else { \ + _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \ + } \ + } while(0) + +/** + * Write n dwords from ptr to the command buffer. + */ +#define OUT_BATCH_TABLE(ptr,n) \ + do { \ + int _n = n; \ + if (b_l_r300->cmdbuf.written+_n <= b_l_r300->cmdbuf.reserved) { \ + memcpy((uint32_t*)b_l_r300->cmdbuf.buf->virtual + b_l_r300->cmdbuf.written, (ptr), 4*_n); \ + b_l_r300->cmdbuf.written += _n; \ + } else { \ + _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH_TABLE mismatch", __FUNCTION__, __LINE__); \ + } \ + } while(0) + +/** + * Finish writing dwords to the command buffer. + * The number of (direct or indirect) OUT_BATCH calls between the previous + * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time. + */ +#define END_BATCH() \ + do { \ + if (b_l_r300->cmdbuf.written != b_l_r300->cmdbuf.reserved) \ + _mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: END_BATCH mismatch", __FUNCTION__, __LINE__); \ + } while(0) + +/** + * After the last END_BATCH() of rendering, this indicates that flushing + * the command buffer now is okay. + */ +#define COMMIT_BATCH() \ + do { \ + assert(b_l_r300->cmdbuf.written == b_l_r300->cmdbuf.reserved); \ + b_l_r300->cmdbuf.committed = b_l_r300->cmdbuf.written; \ + } while(0) + static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, int dwords, const char *caller) { @@ -75,8 +134,9 @@ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, r300EnsureCmdBufSpace(r300, dwords, caller); - ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; - r300->cmdbuf.count_used += dwords; + ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written; + r300->cmdbuf.written += dwords; + r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written; return ptr; } @@ -87,30 +147,17 @@ static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300, r300EnsureCmdBufSpace(r300, dwords, caller); - if (!r300->cmdbuf.count_used) { + if (!r300->cmdbuf.written) { if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "Reemit state after flush (from %s)\n", caller); r300EmitState(r300); } - ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; - r300->cmdbuf.count_used += dwords; + ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written; + r300->cmdbuf.written += dwords; + r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written; return ptr; } -extern void r300EmitBlit(r300ContextPtr rmesa, - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, GLuint w, GLuint h); - -extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); -extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); -extern void r300EmitVertexShader(r300ContextPtr rmesa); -extern void r300EmitPixelShader(r300ContextPtr rmesa); - #endif /* __R300_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index fcf571d..cc9c11a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -59,15 +59,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_span.h" #include "r300_context.h" #include "r300_cmdbuf.h" +#include "r300_mipmap_tree.h" #include "r300_state.h" #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" #include "r300_swtcl.h" -#ifdef USER_BUFFERS #include "r300_mem.h" -#endif #include "vblank.h" #include "utils.h" @@ -190,7 +189,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, struct dd_function_table functions; r300ContextPtr r300; GLcontext *ctx; - int tcl_mode, i; + int tcl_mode; assert(glVisual); assert(driContextPriv); @@ -222,10 +221,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); -#ifdef USER_BUFFERS - r300_mem_init(r300); -#endif - if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { @@ -233,34 +228,9 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } + r300->radeon.bufmgr = radeonBufmgrClassicInit(r300); + /* Init r300 context data */ - r300->dma.buf0_address = - r300->radeon.radeonScreen->buffers->list[0].address; - - (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps)); - make_empty_list(&r300->swapped); - - r300->nr_heaps = 1 /* screen->numTexHeaps */ ; - assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS); - for (i = 0; i < r300->nr_heaps; i++) { - /* *INDENT-OFF* */ - r300->texture_heaps[i] = driCreateTextureHeap(i, r300, - screen-> - texSize[i], 12, - RADEON_NR_TEX_REGIONS, - (drmTextureRegionPtr) - r300->radeon.sarea-> - tex_list[i], - &r300->radeon.sarea-> - tex_age[i], - &r300->swapped, - sizeof - (r300TexObj), - (destroy_texture_object_t - *) - r300DestroyTexObj); - /* *INDENT-ON* */ - } r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache, "texture_depth"); if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) @@ -299,13 +269,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; -#ifdef USER_BUFFERS /* Needs further modifications */ #if 0 ctx->Const.MaxArrayLockSize = ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); #endif -#endif /* Initialize the software rasterizer and helper modules. */ @@ -407,72 +375,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, return GL_TRUE; } -static void r300FreeGartAllocations(r300ContextPtr r300) -{ - int i, ret, tries = 0, done_age, in_use = 0; - drm_radeon_mem_free_t memfree; - - memfree.region = RADEON_MEM_REGION_GART; - -#ifdef USER_BUFFERS - for (i = r300->rmm->u_last; i > 0; i--) { - if (r300->rmm->u_list[i].ptr == NULL) { - continue; - } - - /* check whether this buffer is still in use */ - if (r300->rmm->u_list[i].pending) { - in_use++; - } - } - /* Cannot flush/lock if no context exists. */ - if (in_use) - r300FlushCmdBuf(r300, __FUNCTION__); - - done_age = radeonGetAge((radeonContextPtr) r300); - - for (i = r300->rmm->u_last; i > 0; i--) { - if (r300->rmm->u_list[i].ptr == NULL) { - continue; - } - - /* check whether this buffer is still in use */ - if (!r300->rmm->u_list[i].pending) { - continue; - } - - assert(r300->rmm->u_list[i].h_pending == 0); - - tries = 0; - while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) { - usleep(10); - done_age = radeonGetAge((radeonContextPtr) r300); - } - if (tries >= 1000) { - WARN_ONCE("Failed to idle region!"); - } - - memfree.region_offset = (char *)r300->rmm->u_list[i].ptr - - (char *)r300->radeon.radeonScreen->gartTextures.map; - - ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd, - DRM_RADEON_FREE, &memfree, - sizeof(memfree)); - if (ret) { - fprintf(stderr, "Failed to free at %p\nret = %s\n", - r300->rmm->u_list[i].ptr, strerror(-ret)); - } else { - if (i == r300->rmm->u_last) - r300->rmm->u_last--; - - r300->rmm->u_list[i].pending = 0; - r300->rmm->u_list[i].ptr = NULL; - } - } - r300->rmm->u_head = i; -#endif /* USER_BUFFERS */ -} - /* Destroy the device specific context. */ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) @@ -496,24 +398,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) assert(r300); /* should never be null */ if (r300) { - GLboolean release_texture_heaps; - - release_texture_heaps = - (r300->radeon.glCtx->Shared->RefCount == 1); _swsetup_DestroyContext(r300->radeon.glCtx); _tnl_ProgramCacheDestroy(r300->radeon.glCtx); _tnl_DestroyContext(r300->radeon.glCtx); _vbo_DestroyContext(r300->radeon.glCtx); _swrast_DestroyContext(r300->radeon.glCtx); - if (r300->dma.current.buf) { - r300ReleaseDmaRegion(r300, &r300->dma.current, - __FUNCTION__); -#ifndef USER_BUFFERS - r300FlushCmdBuf(r300, __FUNCTION__); -#endif + if (r300->dma.current) { + dri_bo_unreference(r300->dma.current); + r300->dma.current = 0; } - r300FreeGartAllocations(r300); + r300FlushCmdBuf(r300, __FUNCTION__); r300DestroyCmdBuf(r300); if (radeon->state.scissor.pClipRects) { @@ -521,28 +416,13 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) radeon->state.scissor.pClipRects = NULL; } - if (release_texture_heaps) { - /* This share group is about to go away, free our private - * texture object data. - */ - int i; - - for (i = 0; i < r300->nr_heaps; i++) { - driDestroyTextureHeap(r300->texture_heaps[i]); - r300->texture_heaps[i] = NULL; - } - - assert(is_empty_list(&r300->swapped)); - } - radeonCleanupContext(&r300->radeon); -#ifdef USER_BUFFERS /* the memory manager might be accessed when Mesa frees the shared * state, so don't destroy it earlier */ - r300_mem_destroy(r300); -#endif + dri_bufmgr_destroy(&r300->radeon.bufmgr->base); + r300->radeon.bufmgr = 0; /* free the option cache */ driDestroyOptionCache(&r300->radeon.optionCache); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index d2017f8..5c99740 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_vertex.h" #include "drm.h" #include "radeon_drm.h" +#include "dri_bufmgr.h" #include "dri_util.h" #include "texmem.h" @@ -47,11 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "mtypes.h" #include "colormac.h" -#define USER_BUFFERS - struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; +typedef struct radeon_bufmgr radeon_bufmgr; #include "radeon_lock.h" #include "mm.h" @@ -122,44 +122,22 @@ static INLINE uint32_t r300PackFloat24(float f) /************ DMA BUFFERS **************/ -/* Need refcounting on dma buffers: - */ -struct r300_dma_buffer { - int refcount; /**< the number of retained regions in buf */ - drmBufPtr buf; - int id; -}; -#undef GET_START -#ifdef USER_BUFFERS -#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) -#else -#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ - (rvb)->address - rmesa->dma.buf0_address + \ - (rvb)->start) -#endif -/* A retained region, eg vertices for indexed vertices. - */ -struct r300_dma_region { - struct r300_dma_buffer *buf; - char *address; /* == buf->address */ - int start, end, ptr; /* offsets from start of buf */ - - int aos_offset; /* address in GART memory */ - int aos_stride; /* distance between elements, in dwords */ - int aos_size; /* number of components (1-4) */ -}; - struct r300_dma { /* Active dma region. Allocations for vertices and retained * regions come from here. Also used for emitting random vertices, * these may be flushed by calling flush_current(); */ - struct r300_dma_region current; + dri_bo *current; /** Buffer that DMA memory is allocated from */ + int current_used; /** Number of bytes allocated and forgotten about */ + int current_vertexptr; /** End of active vertex region */ + /** + * If current_vertexptr != current_used then flush must be non-zero. + * flush must be called before non-active vertex allocations can be + * performed. + */ void (*flush) (r300ContextPtr); - char *buf0_address; /* start of buf[0], for index calcs */ - /* Number of "in-flight" DMA buffers, i.e. the number of buffers * for which a DISCARD command is currently queued in the command buffer. */ @@ -173,15 +151,12 @@ typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; /* Texture object in locally shared texture space. */ struct r300_tex_obj { - driTextureObject base; - - GLuint bufAddr; /* Offset to start of locally - shared texture block */ - - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; - /* Six, for the cube faces */ + struct gl_texture_object base; + struct _r300_mipmap_tree *mt; + GLuint dirty_images[6]; GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ + GLuint override_offset; GLuint pitch; /* this isn't sent to hardware just used in calculations */ /* hardware register values */ @@ -191,30 +166,16 @@ struct r300_tex_obj { GLuint pitch_reg; GLuint size; /* npot only */ GLuint format; - GLuint offset; /* Image location in the card's address space. - All cube faces follow. */ - GLuint unknown4; - GLuint unknown5; - /* end hardware registers */ - - /* registers computed by r200 code - keep them here to - compare against what is actually written. - - to be removed later.. */ GLuint pp_border_color; - GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ - GLuint format_x; - - GLboolean border_fallback; + /* end hardware registers */ GLuint tile_bits; /* hw texture tile bits used on this texture */ }; -struct r300_texture_env_state { - r300TexObjPtr texobj; - GLenum format; - GLenum envMode; -}; +static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj) +{ + return (r300TexObj*)texObj; +} /* The blit width for texture uploads */ @@ -222,7 +183,6 @@ struct r300_texture_env_state { #define R300_MAX_TEXTURE_UNITS 8 struct r300_texture_state { - struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; int tc_count; /* number of incoming texture coordinates from VAP */ }; @@ -242,6 +202,7 @@ struct r300_state_atom { GLboolean dirty; int (*check) (r300ContextPtr, struct r300_state_atom * atom); + void (*emit) (r300ContextPtr); }; #define R300_VPT_CMD_0 0 @@ -549,6 +510,8 @@ struct r300_hw_state { struct r300_state_atom border_color; } tex; struct r300_state_atom txe; /* tex enable (4104) */ + + r300TexObj *textures[R300_MAX_TEXTURE_UNITS]; }; /** @@ -559,10 +522,14 @@ struct r300_hw_state { * otherwise. */ struct r300_cmdbuf { - int size; /* DWORDs allocated for buffer */ - uint32_t *cmd_buf; - int count_used; /* DWORDs filled so far */ - int count_reemit; /* size of re-emission batch */ + dri_bo *buf; + int reemit; /** # of dwords in reemit sequence (is always <= committed) */ + int size; /** # of dwords total */ + + int committed; /** # of dwords that we have committed to */ + int written; /** # of dwords written (is always >= committed) */ + int reserved; /** # of dwords reserved up to previous BEGIN_BATCH */ + unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */ }; /** @@ -811,18 +778,25 @@ struct r500_fragment_program { #define REG_COLOR0 1 #define REG_TEX0 2 +struct r300_aos { + dri_bo *bo; /** Buffer object where vertex data is stored */ + int offset; /** Offset into buffer object, in bytes */ + int components; /** Number of components per vertex */ + int stride; /** Stride in dwords (may be 0 for repeating) */ + int count; /** Number of vertices */ +}; + struct r300_state { struct r300_depthbuffer_state depth; struct r300_texture_state texture; int sw_tcl_inputs[VERT_ATTRIB_MAX]; struct r300_vertex_shader_state vertex_shader; - struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; + struct r300_aos aos[R300_MAX_AOS_ARRAYS]; int aos_count; - GLuint *Elts; - struct r300_dma_region elt_dma; + dri_bo *elt_dma_bo; /** Buffer object that contains element indices */ + int elt_dma_offset; /** Offset into this buffer object, in bytes */ - struct r300_dma_region swtcl_dma; DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. They are the same as tnl->render_inputs for fixed pipeline */ @@ -880,13 +854,6 @@ struct r300_swtcl_info { * Offset of the 3UB specular color data within a hardware (swtcl) vertex. */ GLuint specoffset; - - /** - * Should Mesa project vertex data or will the hardware do it? - */ - GLboolean needproj; - - struct r300_dma_region indexed_verts; }; @@ -905,25 +872,11 @@ struct r300_context { /* Vertex buffers */ struct r300_dma dma; - GLboolean save_on_next_unlock; GLuint NewGLState; - /* Texture object bookkeeping - */ - unsigned nr_heaps; - driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; - driTextureObject swapped; int texture_depth; float initialMaxAnisotropy; - /* Clientdata textures; - */ - GLuint prefer_gart_client_texturing; - -#ifdef USER_BUFFERS - struct r300_memory_manager *rmm; -#endif - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 2ea17ad..5e2afd5 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -51,9 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_ioctl.h" -#ifdef USER_BUFFERS #include "r300_mem.h" -#endif #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ @@ -86,11 +84,9 @@ do { \ } while (0) #endif -static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) +static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count) { int i; - int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", @@ -106,11 +102,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, } } -static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) +static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count) { int i; - int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", @@ -127,11 +121,9 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, } } -static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) +static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count) { int i; - int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", @@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, } } -static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) +static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count) { int i; - int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", @@ -172,35 +162,31 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, } } -static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, + +static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos, GLvoid * data, int size, int stride, int count) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + uint32_t *out; if (stride == 0) { - r300AllocDmaRegion(rmesa, rvb, size * 4, 4); + r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); count = 1; - rvb->aos_offset = GET_START(rvb); - rvb->aos_stride = 0; + aos->stride = 0; } else { - r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); - rvb->aos_offset = GET_START(rvb); - rvb->aos_stride = size; + r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); + aos->stride = size; } + aos->components = size; + aos->count = count; + + out = (uint32_t*)((char*)aos->bo->virtual + aos->offset); switch (size) { - case 1: - r300EmitVec4(ctx, rvb, data, stride, count); - break; - case 2: - r300EmitVec8(ctx, rvb, data, stride, count); - break; - case 3: - r300EmitVec12(ctx, rvb, data, stride, count); - break; - case 4: - r300EmitVec16(ctx, rvb, data, stride, count); - break; + case 1: r300EmitVec4(out, data, stride, count); break; + case 2: r300EmitVec8(out, data, stride, count); break; + case 3: r300EmitVec12(out, data, stride, count); break; + case 4: r300EmitVec16(out, data, stride, count); break; default: assert(0); break; @@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx) } for (i = 0; i < nr; i++) { - int ci, fix, found = 0; + int ci; swizzle[i][0] = SWIZZLE_ZERO; swizzle[i][1] = SWIZZLE_ZERO; @@ -444,48 +430,10 @@ int r300EmitArrays(GLcontext * ctx) swizzle[i][ci] = ci; } - if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { - if (vb->AttribPtr[tab[i]]->stride % 4) { - return R300_FALLBACK_TCL; - } - rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); - rmesa->state.aos[i].start = 0; - rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); - rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; - rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; - } else { - r300EmitVec(ctx, &rmesa->state.aos[i], - vb->AttribPtr[tab[i]]->data, - vb->AttribPtr[tab[i]]->size, - vb->AttribPtr[tab[i]]->stride, count); - } - - rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; - - for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { - if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { - continue; - } - found = 1; - break; - } - - if (found) { - if (fix > 0) { - WARN_ONCE("Feeling lucky?\n"); - } - rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; - for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { - swizzle[i][ci] += fix; - } - } else { - WARN_ONCE - ("Cannot handle offset %x with stride %d, comp %d\n", - rmesa->state.aos[i].aos_offset, - rmesa->state.aos[i].aos_stride, - vb->AttribPtr[tab[i]]->size); - return R300_FALLBACK_TCL; - } + r300EmitVec(ctx, &rmesa->state.aos[i], + vb->AttribPtr[tab[i]]->data, + vb->AttribPtr[tab[i]]->size, + vb->AttribPtr[tab[i]]->stride, count); } /* Setup INPUT_ROUTE. */ @@ -515,45 +463,76 @@ int r300EmitArrays(GLcontext * ctx) return R300_FALLBACK_NONE; } -#ifdef USER_BUFFERS -void r300UseArrays(GLcontext * ctx) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; - - if (rmesa->state.elt_dma.buf) - r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id); - - for (i = 0; i < rmesa->state.aos_count; i++) { - if (rmesa->state.aos[i].buf) - r300_mem_use(rmesa, rmesa->state.aos[i].buf->id); - } -} -#endif - void r300ReleaseArrays(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); int i; - r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__); + if (rmesa->state.elt_dma_bo) { + dri_bo_unreference(rmesa->state.elt_dma_bo); + rmesa->state.elt_dma_bo = 0; + } for (i = 0; i < rmesa->state.aos_count; i++) { - r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); + if (rmesa->state.aos[i].bo) { + dri_bo_unreference(rmesa->state.aos[i].bo); + rmesa->state.aos[i].bo = 0; + } } } void r300EmitCacheFlush(r300ContextPtr rmesa) { - int cmd_reserved = 0; - int cmd_written = 0; - - drm_radeon_cmd_header_t *cmd = NULL; - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + BATCH_LOCALS(rmesa); + + BEGIN_BATCH(4); + OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + END_BATCH(); + COMMIT_BATCH(); +} - reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); - e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); +void r300EmitBlit(r300ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + dri_bo *src_bo, int src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, GLuint w, GLuint h) +{ + BATCH_LOCALS(rmesa); + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", + __FUNCTION__, src_pitch, src_offset, srcx, srcy, + dst_pitch, dst_offset, dstx, dsty, w, h); + + assert((src_pitch & 63) == 0); + assert((dst_pitch & 63) == 0); + assert((src_offset & 1023) == 0); + assert((dst_offset & 1023) == 0); + assert(w < (1 << 16)); + assert(h < (1 << 16)); + + BEGIN_BATCH(8); + OUT_BATCH_PACKET3(R300_CP_CMD_BITBLT_MULTI, 5); + OUT_BATCH(RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); + OUT_BATCH_RELOC((src_pitch / 64) << 22, src_bo, src_offset, DRM_RELOC_BLITTER); + OUT_BATCH(((dst_pitch / 64) << 22) | (dst_offset >> 10)); + OUT_BATCH((srcx << 16) | srcy); + OUT_BATCH((dstx << 16) | dsty); + OUT_BATCH((w << 16) | h); + END_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 5950539..179983d 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -127,130 +127,62 @@ static INLINE uint32_t cmdpacify(void) return cmd.u; } -/** - * Prepare to write a register value to register at address reg. - * If num_extra > 0 then the following extra values are written - * to registers with address +4, +8 and so on.. - */ -#define reg_start(reg, num_extra) \ - do { \ - int _n; \ - _n=(num_extra); \ - cmd = (drm_radeon_cmd_header_t*) \ - r300AllocCmdBuf(rmesa, \ - (_n+2), \ - __FUNCTION__); \ - cmd_reserved=_n+2; \ - cmd_written=1; \ - cmd[0].i=cmdpacket0((reg), _n+1); \ - } while (0); + +/** Single register write to command buffer; requires 2 dwords. */ +#define OUT_BATCH_REGVAL(reg, val) \ + OUT_BATCH(cmdpacket0((reg), 1)); \ + OUT_BATCH((val)) + +/** Continuous register range write to command buffer; requires 1 dword, + * expects count dwords afterwards for register contents. */ +#define OUT_BATCH_REGSEQ(reg, count) \ + OUT_BATCH(cmdpacket0((reg), (count))); + +/** Write a 32 bit float to the ring; requires 1 dword. */ +#define OUT_BATCH_FLOAT32(f) \ + OUT_BATCH(r300PackFloat32((f))); /** - * Emit GLuint freestyle + * Write the header of a packet3 to the command buffer. + * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. */ -#define e32(dword) \ - do { \ - if(cmd_written 0x3fff) { \ - fprintf(stderr,"Too big packet3 %08x: cannot " \ - "store %d dwords\n", \ - _p, _n); \ - _mesa_exit(-1); \ - } \ - cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ - cmd[1].i = _p | ((_n & 0x3fff)<<16); \ - } - /** * Must be sent to switch to 2d commands */ void static INLINE end_3d(r300ContextPtr rmesa) { - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(rmesa); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].header.cmd_type = R300_CMD_END3D; + BEGIN_BATCH(1); + OUT_BATCH(cmdpacify()); + END_BATCH(); } void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) { - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(rmesa); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].i = cmdcpdelay(count); + BEGIN_BATCH(1); + OUT_BATCH(cmdcpdelay(count)); + END_BATCH(); } void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags) { - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(rmesa); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].i = cmdwait(flags); + BEGIN_BATCH(1); + OUT_BATCH(cmdwait(flags)); + END_BATCH(); } extern int r300EmitArrays(GLcontext * ctx); -#ifdef USER_BUFFERS -void r300UseArrays(GLcontext * ctx); -#endif - extern void r300ReleaseArrays(GLcontext * ctx); extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); @@ -265,4 +197,13 @@ extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); +extern void r300EmitBlit(r300ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + dri_bo *src_bo, int src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, GLuint w, GLuint h); + #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index bd7f060..046f9a2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_reg.h" #include "r300_emit.h" #include "r300_fragprog.h" +#include "r300_mem.h" #include "vblank.h" @@ -62,64 +63,51 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 -static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) +static void r300ClearBuffer(r300ContextPtr r300, int flags, + struct radeon_renderbuffer *rrb) { + BATCH_LOCALS(r300); GLcontext *ctx = r300->radeon.glCtx; __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; - GLuint cboffset, cbpitch; - drm_r300_cmd_header_t *cmd2; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + GLuint cbpitch = 0; r300ContextPtr rmesa = r300; if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n", - __FUNCTION__, buffer ? "back" : "front", - dPriv->x, dPriv->y, dPriv->w, dPriv->h); - - if (buffer) { - cboffset = r300->radeon.radeonScreen->backOffset; - cbpitch = r300->radeon.radeonScreen->backPitch; - } else { - cboffset = r300->radeon.radeonScreen->frontOffset; - cbpitch = r300->radeon.radeonScreen->frontPitch; + fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", + __FUNCTION__, rrb, dPriv->x, dPriv->y, + dPriv->w, dPriv->h); + + if (rrb) { + cbpitch = rrb->pitch; + if (rrb->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (r300->radeon.sarea->tiling_enabled) + cbpitch |= R300_COLOR_TILE_ENABLE; } - cboffset += r300->radeon.radeonScreen->fbLocation; - + /* TODO in bufmgr */ cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); end_3d(rmesa); - R300_STATECHANGE(r300, cb); - reg_start(R300_RB3D_COLOROFFSET0, 0); - e32(cboffset); - - if (r300->radeon.radeonScreen->cpp == 4) - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else - cbpitch |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - cbpitch |= R300_COLOR_TILE_ENABLE; - - reg_start(R300_RB3D_COLORPITCH0, 0); - e32(cbpitch); - - R300_STATECHANGE(r300, cmk); - reg_start(RB3D_COLOR_CHANNEL_MASK, 0); + BEGIN_BATCH(19); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET); + OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); + OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); if (flags & CLEARBUFFER_COLOR) { - e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | - (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | - (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | - (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); + OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); } else { - e32(0x0); + OUT_BATCH(0); } - R300_STATECHANGE(r300, zs); - reg_start(R300_ZB_CNTL, 2); + OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); { uint32_t t1, t2; @@ -146,37 +134,37 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) R300_S_FRONT_ZFAIL_OP_SHIFT); } - e32(t1); - e32(t2); - e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | - (ctx->Stencil.Clear & R300_STENCILREF_MASK)); + OUT_BATCH(t1); + OUT_BATCH(t2); + OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | + (ctx->Stencil.Clear & R300_STENCILREF_MASK)); } - cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); - cmd2[0].packet3.cmd_type = R300_CMD_PACKET3; - cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR; - cmd2[1].u = r300PackFloat32(dPriv->w / 2.0); - cmd2[2].u = r300PackFloat32(dPriv->h / 2.0); - cmd2[3].u = r300PackFloat32(ctx->Depth.Clear); - cmd2[4].u = r300PackFloat32(1.0); - cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]); - cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]); - cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); - cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); + OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_CLEAR)); + OUT_BATCH_FLOAT32(dPriv->w / 2.0); + OUT_BATCH_FLOAT32(dPriv->h / 2.0); + OUT_BATCH_FLOAT32(ctx->Depth.Clear); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); + END_BATCH(); r300EmitCacheFlush(rmesa); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + + R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, cmk); + R300_STATECHANGE(r300, zs); } static void r300EmitClearState(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - r300ContextPtr rmesa = r300; + BATCH_LOCALS(r300); __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; int is_r500 = 0; GLuint vap_cntl; @@ -184,35 +172,37 @@ static void r300EmitClearState(GLcontext * ctx) if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - is_r500 = 1; - + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; - /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and - * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are - * quite complex; see the functions in r300_emit.c. + /* State atom dirty tracking is a little subtle here. + * + * On the one hand, we need to make sure base state is emitted + * here if we start with an empty batch buffer, otherwise clear + * works incorrectly with multiple processes. Therefore, the first + * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. * - * I believe it would be a good idea to extend the functions in - * r300_emit.c so that they can be used to setup the default values for - * these registers, as well as the actual values used for rendering. + * On the other hand, implicit state emission clears the state atom + * dirty bits, so we have to call R300_STATECHANGE later than the + * first BEGIN_BATCH. + * + * The final trickiness is that, because we change state, we need + * to ensure that any stored swtcl primitives are flushed properly + * before we start changing state. See the R300_NEWPRIM in r300Clear + * for this. */ - R300_STATECHANGE(r300, vir[0]); - reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); + BEGIN_BATCH(31); + OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); if (!has_tcl) - e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - /* disable fog */ - R300_STATECHANGE(r300, fogs); - reg_start(R300_FG_FOG_BLEND, 0); - e32(0x0); - - R300_STATECHANGE(r300, vir[1]); - reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); - e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, + ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | @@ -226,238 +216,246 @@ static void r300EmitClearState(GLcontext * ctx) << R300_SWIZZLE1_SHIFT))); /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ - R300_STATECHANGE(r300, vic); - reg_start(R300_VAP_VTX_STATE_CNTL, 1); - e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); - e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); + OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); + OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - R300_STATECHANGE(r300, vte); /* comes from fglrx startup of clear */ - reg_start(R300_SE_VTE_CNTL, 1); - e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA); - e32(0x8); + OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); + OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA); + OUT_BATCH(0x8); - reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); - e32(0xaaaaaaaa); + OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); - R300_STATECHANGE(r300, vof); - reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); - e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); - e32(0x0); /* no textures */ + OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); + OUT_BATCH(0); /* no textures */ - R300_STATECHANGE(r300, txe); - reg_start(R300_TX_ENABLE, 0); - e32(0x0); + OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); - R300_STATECHANGE(r300, vpt); - reg_start(R300_SE_VPORT_XSCALE, 5); - efloat(1.0); - efloat(dPriv->x); - efloat(1.0); - efloat(dPriv->y); - efloat(1.0); - efloat(0.0); + OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(dPriv->x); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(dPriv->y); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(0.0); - R300_STATECHANGE(r300, at); - reg_start(R300_FG_ALPHA_FUNC, 0); - e32(0x0); + OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); + + OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + END_BATCH(); + R300_STATECHANGE(r300, vir[0]); + R300_STATECHANGE(r300, fogs); + R300_STATECHANGE(r300, vir[1]); + R300_STATECHANGE(r300, vic); + R300_STATECHANGE(r300, vte); + R300_STATECHANGE(r300, vof); + R300_STATECHANGE(r300, txe); + R300_STATECHANGE(r300, vpt); + R300_STATECHANGE(r300, at); R300_STATECHANGE(r300, bld); - reg_start(R300_RB3D_CBLEND, 1); - e32(0x0); - e32(0x0); + R300_STATECHANGE(r300, ps); if (has_tcl) { - R300_STATECHANGE(r300, vap_clip_cntl); - reg_start(R300_VAP_CLIP_CNTL, 0); - e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); + R300_STATECHANGE(r300, vap_clip_cntl); + + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); + END_BATCH(); } - R300_STATECHANGE(r300, ps); - reg_start(R300_GA_POINT_SIZE, 0); - e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | - ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, + ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | + ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); + END_BATCH(); if (!is_r500) { R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 7); - for (i = 0; i < 8; ++i) { - e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_INST_0, 0); - e32(R300_RS_INST_COL_CN_WRITE); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); + for (i = 0; i < 8; ++i) + OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0x0); + + OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); + END_BATCH(); } else { R300_STATECHANGE(r300, ri); - reg_start(R500_RS_IP_0, 7); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); for (i = 0; i < 8; ++i) { - e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); - - R300_STATECHANGE(r300, rr); - reg_start(R500_RS_INST_0, 0); - e32(R500_RS_INST_COL_CN_WRITE); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0x0); + OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); + END_BATCH(); } if (!is_r500) { R300_STATECHANGE(r300, fp); - reg_start(R300_US_CONFIG, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_US_CODE_ADDR_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_RGBA_OUT); - R300_STATECHANGE(r300, fpi[0]); R300_STATECHANGE(r300, fpi[1]); R300_STATECHANGE(r300, fpi[2]); R300_STATECHANGE(r300, fpi[3]); - reg_start(R300_US_ALU_RGB_INST_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - - reg_start(R300_US_ALU_RGB_ADDR_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - - reg_start(R300_US_ALU_ALPHA_INST_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - - reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + BEGIN_BATCH(17); + OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(R300_RGBA_OUT); + + OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, + FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, + FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, + FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, + FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + END_BATCH(); } else { - R300_STATECHANGE(r300, fp); - reg_start(R500_US_CONFIG, 1); - e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - e32(0x0); - reg_start(R500_US_CODE_ADDR, 2); - e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); - e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); - e32(R500_US_CODE_OFFSET_ADDR(0)); - + R300_STATECHANGE(r300, fp); R300_STATECHANGE(r300, r500fp); - r500fp_start_fragment(0, 6); - - e32(R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP); - - e32(R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST); - - e32(R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST); - - e32(R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B); - - e32(R500_ALPHA_OP_CMP | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_A); - - e32(R500_ALU_RGBA_OP_CMP | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); + OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_BATCH(0x0); + OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); + OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); + OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); + OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); + + OUT_BATCH(cmdr500fp(0, 1, 0, 0)); + OUT_BATCH(R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP); + OUT_BATCH(R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST); + OUT_BATCH(R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST); + OUT_BATCH(R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B); + OUT_BATCH(R500_ALPHA_OP_CMP | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_A); + OUT_BATCH(R500_ALU_RGBA_OP_CMP | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0); + END_BATCH(); } - reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); - e32(0x00000000); + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); + if (has_tcl) { - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | (5 << R300_PVS_NUM_CNTLRS_SHIFT) | (12 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - vap_cntl |= R500_TCL_STATE_OPTIMIZATION; - } else - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | (5 << R300_PVS_NUM_CNTLRS_SHIFT) | (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + } if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) - vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) - vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); else - vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + + R300_STATECHANGE(r300, vap_cntl); - R300_STATECHANGE(rmesa, vap_cntl); - reg_start(R300_VAP_CNTL, 0); - e32(vap_cntl); + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); + END_BATCH(); if (has_tcl) { R300_STATECHANGE(r300, pvs); - reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); - - e32((0 << R300_PVS_FIRST_INST_SHIFT) | - (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | - (1 << R300_PVS_LAST_INST_SHIFT)); - e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); - e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); - R300_STATECHANGE(r300, vpi); - vsf_start_fragment(0x0, 8); - - e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); - e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(0x0); - e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); - e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(0x0); + BEGIN_BATCH(13); + OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | + (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (1 << R300_PVS_LAST_INST_SHIFT)); + OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); + OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + + OUT_BATCH(cmdvpu(0, 2)); + OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); + OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); + OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); + OUT_BATCH(0x0); + + OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); + OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); + OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); + OUT_BATCH(0x0); + END_BATCH(); } } @@ -467,7 +465,10 @@ static void r300EmitClearState(GLcontext * ctx) static void r300Clear(GLcontext * ctx, GLbitfield mask) { r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(r300); __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + GLframebuffer *fb = dPriv->driverPrivate; + struct radeon_renderbuffer *rrb; int flags = 0; int bits = 0; int swapped; @@ -482,6 +483,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) return; } + /* Flush swtcl vertices if necessary, because we will change hardware + * state during clear. See also the state-related comment in + * r300EmitClearState. + */ + R300_NEWPRIM(r300); + if (mask & BUFFER_BIT_FRONT_LEFT) { flags |= BUFFER_BIT_FRONT_LEFT; mask &= ~BUFFER_BIT_FRONT_LEFT; @@ -509,26 +516,27 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) _swrast_Clear(ctx, mask); } - swapped = r300->radeon.sarea->pfCurrentPage == 1; - /* Make sure it fits there. */ r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__); if (flags || bits) r300EmitClearState(ctx); if (flags & BUFFER_BIT_FRONT_LEFT) { - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped); + rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb); bits = 0; } if (flags & BUFFER_BIT_BACK_LEFT) { - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1); + rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb); bits = 0; } if (bits) - r300ClearBuffer(r300, bits, 0); + r300ClearBuffer(r300, bits, NULL); + COMMIT_BATCH(); } void r300Flush(GLcontext * ctx) @@ -541,16 +549,12 @@ void r300Flush(GLcontext * ctx) if (rmesa->dma.flush) rmesa->dma.flush( rmesa ); - if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) + if (rmesa->cmdbuf.committed > rmesa->cmdbuf.reemit) r300FlushCmdBuf(rmesa, __FUNCTION__); } -#ifdef USER_BUFFERS -#include "r300_mem.h" - void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) { - struct r300_dma_buffer *dmabuf; size = MAX2(size, RADEON_BUFFER_SIZE * 16); if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) @@ -560,71 +564,24 @@ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) rmesa->dma.flush(rmesa); } - if (rmesa->dma.current.buf) { -#ifdef USER_BUFFERS - r300_mem_use(rmesa, rmesa->dma.current.buf->id); -#endif - r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); + if (rmesa->dma.current) { + dri_bo_unreference(rmesa->dma.current); + rmesa->dma.current = 0; } if (rmesa->dma.nr_released_bufs > 4) r300FlushCmdBuf(rmesa, __FUNCTION__); - dmabuf = CALLOC_STRUCT(r300_dma_buffer); - dmabuf->buf = (void *)1; /* hack */ - dmabuf->refcount = 1; - - dmabuf->id = r300_mem_alloc(rmesa, 4, size); - if (dmabuf->id == 0) { - LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ - - r300FlushCmdBufLocked(rmesa, __FUNCTION__); - radeonWaitForIdleLocked(&rmesa->radeon); - - dmabuf->id = r300_mem_alloc(rmesa, 4, size); - - UNLOCK_HARDWARE(&rmesa->radeon); - - if (dmabuf->id == 0) { - fprintf(stderr, - "Error: Could not get dma buffer... exiting\n"); - _mesa_exit(-1); - } - } - - rmesa->dma.current.buf = dmabuf; - rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id); - rmesa->dma.current.end = size; - rmesa->dma.current.start = 0; - rmesa->dma.current.ptr = 0; -} - -void r300ReleaseDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, const char *caller) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (!region->buf) - return; - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (--region->buf->refcount == 0) { - r300_mem_free(rmesa, region->buf->id); - FREE(region->buf); - rmesa->dma.nr_released_bufs++; - } - - region->buf = 0; - region->start = 0; + rmesa->dma.current = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "DMA regions", + size, 4, DRM_BO_MEM_DMA); + rmesa->dma.current_used = 0; + rmesa->dma.current_vertexptr = 0; } /* Allocates a region from rmesa->dma.current. If there isn't enough * space in current, grab a new buffer (and discard what was left of current) */ void r300AllocDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, + dri_bo **pbo, int *poffset, int bytes, int alignment) { if (RADEON_DEBUG & DEBUG_IOCTL) @@ -633,207 +590,23 @@ void r300AllocDmaRegion(r300ContextPtr rmesa, if (rmesa->dma.flush) rmesa->dma.flush(rmesa); - if (region->buf) - r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); + assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); alignment--; - rmesa->dma.current.start = rmesa->dma.current.ptr = - (rmesa->dma.current.ptr + alignment) & ~alignment; - - if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) - r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7); - - region->start = rmesa->dma.current.start; - region->ptr = rmesa->dma.current.start; - region->end = rmesa->dma.current.start + bytes; - region->address = rmesa->dma.current.address; - region->buf = rmesa->dma.current.buf; - region->buf->refcount++; + rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; - rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ - rmesa->dma.current.start = - rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; - - assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); -} + if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) + r300RefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); -#else -static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) -{ - struct r300_dma_buffer *dmabuf; - int fd = rmesa->radeon.dri.fd; - int index = 0; - int size = 0; - drmDMAReq dma; - int ret; + *poffset = rmesa->dma.current_used; + *pbo = rmesa->dma.current; + dri_bo_reference(*pbo); - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) - fprintf(stderr, "%s\n", __FUNCTION__); + /* Always align to at least 16 bytes */ + rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; + rmesa->dma.current_vertexptr = rmesa->dma.current_used; - if (rmesa->dma.flush) { - rmesa->dma.flush(rmesa); - } - - if (rmesa->dma.current.buf) - r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); - - if (rmesa->dma.nr_released_bufs > 4) - r300FlushCmdBuf(rmesa, __FUNCTION__); - - dma.context = rmesa->radeon.dri.hwContext; - dma.send_count = 0; - dma.send_list = NULL; - dma.send_sizes = NULL; - dma.flags = 0; - dma.request_count = 1; - dma.request_size = RADEON_BUFFER_SIZE; - dma.request_list = &index; - dma.request_sizes = &size; - dma.granted_count = 0; - - LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ - - ret = drmDMA(fd, &dma); - - if (ret != 0) { - /* Try to release some buffers and wait until we can't get any more */ - if (rmesa->dma.nr_released_bufs) { - r300FlushCmdBufLocked(rmesa, __FUNCTION__); - } - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "Waiting for buffers\n"); - - radeonWaitForIdleLocked(&rmesa->radeon); - ret = drmDMA(fd, &dma); - - if (ret != 0) { - UNLOCK_HARDWARE(&rmesa->radeon); - fprintf(stderr, - "Error: Could not get dma buffer... exiting\n"); - _mesa_exit(-1); - } - } - - UNLOCK_HARDWARE(&rmesa->radeon); - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "Allocated buffer %d\n", index); - - dmabuf = CALLOC_STRUCT(r300_dma_buffer); - dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index]; - dmabuf->refcount = 1; - - rmesa->dma.current.buf = dmabuf; - rmesa->dma.current.address = dmabuf->buf->address; - rmesa->dma.current.end = dmabuf->buf->total; - rmesa->dma.current.start = 0; - rmesa->dma.current.ptr = 0; -} - -void r300ReleaseDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, const char *caller) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (!region->buf) - return; - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (--region->buf->refcount == 0) { - drm_radeon_cmd_header_t *cmd; - - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) - fprintf(stderr, "%s -- DISCARD BUF %d\n", - __FUNCTION__, region->buf->buf->idx); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, - sizeof - (*cmd) / 4, - __FUNCTION__); - cmd->dma.cmd_type = R300_CMD_DMA_DISCARD; - cmd->dma.buf_idx = region->buf->buf->idx; - - FREE(region->buf); - rmesa->dma.nr_released_bufs++; - } - - region->buf = 0; - region->start = 0; -} - -/* Allocates a region from rmesa->dma.current. If there isn't enough - * space in current, grab a new buffer (and discard what was left of current) - */ -void r300AllocDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, - int bytes, int alignment) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (region->buf) - r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); - - alignment--; - rmesa->dma.current.start = rmesa->dma.current.ptr = - (rmesa->dma.current.ptr + alignment) & ~alignment; - - if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) - r300RefillCurrentDmaRegion(rmesa); - - region->start = rmesa->dma.current.start; - region->ptr = rmesa->dma.current.start; - region->end = rmesa->dma.current.start + bytes; - region->address = rmesa->dma.current.address; - region->buf = rmesa->dma.current.buf; - region->buf->refcount++; - - rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ - rmesa->dma.current.start = - rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; - - assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); -} - -#endif - -GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer, - GLint size) -{ - int offset = - (char *)pointer - - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - int valid = (size >= 0 && offset >= 0 - && offset + size < - rmesa->radeon.radeonScreen->gartTextures.size); - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer, - valid); - - return valid; -} - -GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer) -{ - int offset = - (char *)pointer - - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - - //fprintf(stderr, "offset=%08x\n", offset); - - if (offset < 0 - || offset > rmesa->radeon.radeonScreen->gartTextures.size) - return ~0; - else - return rmesa->radeon.radeonScreen->gart_texture_offset + offset; + assert(rmesa->dma.current_used <= rmesa->dma.current->size); } void r300InitIoctlFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h index e1143fb..c743478 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.h +++ b/src/mesa/drivers/dri/r300/r300_ioctl.h @@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "radeon_drm.h" -extern GLboolean r300IsGartMemory(r300ContextPtr rmesa, - const GLvoid * pointer, GLint size); - -extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, - const GLvoid * pointer); - extern void r300Flush(GLcontext * ctx); -extern void r300ReleaseDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, - const char *caller); extern void r300AllocDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, int bytes, - int alignment); + dri_bo **pbo, int *poffset, + int bytes, int alignment); extern void r300InitIoctlFuncs(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c index f8f9d4f..b045393 100644 --- a/src/mesa/drivers/dri/r300/r300_mem.c +++ b/src/mesa/drivers/dri/r300/r300_mem.c @@ -27,359 +27,843 @@ /** * \file + * Simulate a real memory manager for R300 in the old-style scheme. + * + * NOTE: Right now, this is DMA-only and really only a skeleton of a true bufmgr. * * \author Aapo Tahkola */ +#include "r300_mem.h" + +#include #include -#include "r300_context.h" -#include "r300_cmdbuf.h" -#include "r300_ioctl.h" -#include "r300_mem.h" +#include "simple_list.h" + #include "radeon_ioctl.h" +#include "r300_cmdbuf.h" -#ifdef USER_BUFFERS +typedef struct _radeon_bufmgr_classic radeon_bufmgr_classic; +typedef struct _radeon_bo_classic radeon_bo_classic; +typedef struct _radeon_bo_functions radeon_bo_functions; +typedef struct _radeon_reloc radeon_reloc; +typedef struct _radeon_bo_vram radeon_bo_vram; + +struct _radeon_bufmgr_classic { + radeon_bufmgr base; + r300ContextPtr rmesa; + + radeon_bo_classic *buffers; /** Unsorted linked list of all buffer objects */ + + radeon_bo_classic *pending; /** Age-sorted linked list of pending buffer objects */ + radeon_bo_classic **pending_tail; + + /* Texture heap bookkeeping */ + driTexHeap *texture_heap; + GLuint texture_offset; + driTextureObject texture_swapped; +}; + +struct _radeon_reloc { + uint64_t flags; + GLuint offset; /**< Offset (in bytes) into command buffer to relocated dword */ + radeon_bo_classic *target; + GLuint delta; +}; + +struct _radeon_bo_functions { + /** + * Free a buffer object. Caller has verified that the object is not + * referenced or pending. + */ + void (*free)(radeon_bo_classic*); + + /** + * Validate the given buffer. Must set the validated flag to 1. + * + * May be null for buffer objects that are always valid. + * Always called with lock held. + */ + void (*validate)(radeon_bo_classic*); + + /** + * Called when a writing map of the buffer is taken, to note that + * the buffer will have to be re-validated. + * + * May be null for buffer objects that don't need it. + */ + void (*dirty)(radeon_bo_classic*); + + /** + * Indicate that the buffer object is now used by the hardware. + * + * May be null. + */ + void (*bind)(radeon_bo_classic*); + + /** + * Indicate that the buffer object is no longer used by the hardware. + * + * May be null. + */ + void (*unbind)(radeon_bo_classic*); +}; -static void resize_u_list(r300ContextPtr rmesa) -{ - void *temp; - int nsize; +/** + * A buffer object. There are three types of buffer objects: + * 1. cmdbuf: Ordinary malloc()ed memory, used for command buffers + * 2. dma: GART memory allocated via the DRM_RADEON_ALLOC ioctl. + * 3. vram: Objects with malloc()ed backing store that will be uploaded + * into VRAM on demand; used for textures. + * There is a @ref functions table for operations that depend on the + * buffer object type. + * + * Fencing is handled the same way all buffer objects. During command buffer + * submission, the pending flag and corresponding variables are set accordingly. + */ +struct _radeon_bo_classic { + dri_bo base; - temp = rmesa->rmm->u_list; - nsize = rmesa->rmm->u_size * 2; + const radeon_bo_functions *functions; - rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list)); - _mesa_memset(rmesa->rmm->u_list, 0, - nsize * sizeof(*rmesa->rmm->u_list)); + radeon_bo_classic *next; /** Unsorted linked list of all buffer objects */ + radeon_bo_classic **pprev; - if (temp) { - r300FlushCmdBuf(rmesa, __FUNCTION__); + /** + * Number of software references to this buffer. + * A buffer is freed automatically as soon as its reference count reaches 0 + * *and* it is no longer pending. + */ + unsigned int refcount; + unsigned int mapcount; /** mmap count; mutually exclusive to being pending */ - _mesa_memcpy(rmesa->rmm->u_list, temp, - rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list)); - _mesa_free(temp); - } + unsigned int validated:1; /** whether the buffer is validated for hardware use right now */ + unsigned int used:1; /* only for communication between process_relocs and post_submit */ + + unsigned int pending:1; + radeon_bo_classic *pending_next; /** Age-sorted linked list of pending buffer objects */ + radeon_bo_classic **pending_pprev; - rmesa->rmm->u_size = nsize; + /* The following two variables are intricately linked to the DRM interface, + * and must be in this physical memory order, or else chaos ensues. + * See the DRM's implementation of R300_CMD_SCRATCH for details. + */ + uint32_t pending_age; /** Buffer object pending until this age is reached, written by the DRM */ + uint32_t pending_count; /** Number of pending R300_CMD_SCRATCH references to this object */ + + radeon_reloc *relocs; /** Array of relocations in this buffer */ + GLuint relocs_used; /** # of relocations in relocation array */ + GLuint relocs_size; /** # of reloc records reserved in relocation array */ +}; + +typedef struct _radeon_vram_wrapper radeon_vram_wrapper; + +/** Wrapper around heap object */ +struct _radeon_vram_wrapper { + driTextureObject base; + radeon_bo_vram *bo; +}; + +struct _radeon_bo_vram { + radeon_bo_classic base; + + unsigned int backing_store_dirty:1; /** Backing store has changed, block must be reuploaded */ + + radeon_vram_wrapper *vram; /** Block in VRAM (if any) */ +}; + +static radeon_bufmgr_classic* get_bufmgr_classic(dri_bufmgr *bufmgr_ctx) +{ + return (radeon_bufmgr_classic*)bufmgr_ctx; } -void r300_mem_init(r300ContextPtr rmesa) +static radeon_bo_classic* get_bo_classic(dri_bo *bo_base) { - rmesa->rmm = malloc(sizeof(struct r300_memory_manager)); - memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager)); + return (radeon_bo_classic*)bo_base; +} - rmesa->rmm->u_size = 128; - resize_u_list(rmesa); +static radeon_bo_vram* get_bo_vram(radeon_bo_classic *bo_base) +{ + return (radeon_bo_vram*)bo_base; } -void r300_mem_destroy(r300ContextPtr rmesa) +/** + * Really free a given buffer object. + */ +static void bo_free(radeon_bo_classic *bo) { - _mesa_free(rmesa->rmm->u_list); - rmesa->rmm->u_list = NULL; + assert(!bo->refcount); + assert(!bo->pending); + assert(!bo->mapcount); + + if (bo->relocs) { + int i; + for(i = 0; i < bo->relocs_used; ++i) + dri_bo_unreference(&bo->relocs[i].target->base); + free(bo->relocs); + bo->relocs = 0; + } + + *bo->pprev = bo->next; + if (bo->next) + bo->next->pprev = bo->pprev; - _mesa_free(rmesa->rmm); - rmesa->rmm = NULL; + bo->functions->free(bo); } -void *r300_mem_ptr(r300ContextPtr rmesa, int id) + +/** + * Keep track of which buffer objects are still pending, i.e. waiting for + * some hardware operation to complete. + */ +static void track_pending_buffers(radeon_bufmgr_classic *bufmgr) { - assert(id <= rmesa->rmm->u_last); - return rmesa->rmm->u_list[id].ptr; + uint32_t currentage = radeonGetAge((radeonContextPtr)bufmgr->rmesa); + + while(bufmgr->pending) { + radeon_bo_classic *bo = bufmgr->pending; + + assert(bo->pending); + + if (bo->pending_count || + bo->pending_age > currentage) // TODO: Age counter wraparound! + break; + + bo->pending = 0; + bufmgr->pending = bo->pending_next; + if (bufmgr->pending) + bufmgr->pending->pending_pprev = &bufmgr->pending; + else + bufmgr->pending_tail = &bufmgr->pending; + + if (bo->functions->unbind) + (*bo->functions->unbind)(bo); + if (!bo->refcount) + bo_free(bo); + } } -int r300_mem_find(r300ContextPtr rmesa, void *ptr) +/** + * Initialize common buffer object data. + */ +static void init_buffer(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, unsigned long size) { - int i; + bo->base.bufmgr = &bufmgr->base.base; + bo->base.size = size; + bo->refcount = 1; + + bo->pprev = &bufmgr->buffers; + bo->next = bufmgr->buffers; + if (bo->next) + bo->next->pprev = &bo->next; + bufmgr->buffers = bo; +} - for (i = 1; i < rmesa->rmm->u_size + 1; i++) - if (rmesa->rmm->u_list[i].ptr && - ptr >= rmesa->rmm->u_list[i].ptr && - ptr < - rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size) - break; - if (i < rmesa->rmm->u_size + 1) - return i; +/** + * Free a DMA-based buffer. + */ +static void dma_free(radeon_bo_classic *bo) +{ + radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bo->base.bufmgr); + drm_radeon_mem_free_t memfree; + int ret; + + memfree.region = RADEON_MEM_REGION_GART; + memfree.region_offset = bo->base.offset; + memfree.region_offset -= bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset; - fprintf(stderr, "%p failed\n", ptr); - return 0; + ret = drmCommandWrite(bufmgr->rmesa->radeon.radeonScreen->driScreen->fd, + DRM_RADEON_FREE, &memfree, sizeof(memfree)); + if (ret) { + fprintf(stderr, "Failed to free bo[%p] at %08x\n", bo, memfree.region_offset); + fprintf(stderr, "ret = %s\n", strerror(-ret)); + exit(1); + } + + free(bo); } -//#define MM_DEBUG -int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size) +static const radeon_bo_functions dma_bo_functions = { + .free = &dma_free +}; + +/** + * Call the DRM to allocate GART memory for the given (incomplete) + * buffer object. + */ +static int try_dma_alloc(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, + unsigned long size, unsigned int alignment) { drm_radeon_mem_alloc_t alloc; - int offset = 0, ret; - int i, free = -1; - int done_age; - drm_radeon_mem_free_t memfree; - int tries = 0; - static int bytes_wasted = 0, allocated = 0; + int baseoffset; + int ret; - if (size < 4096) - bytes_wasted += 4096 - size; + alloc.region = RADEON_MEM_REGION_GART; + alloc.alignment = alignment; + alloc.size = size; + alloc.region_offset = &baseoffset; + + ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd, + DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); + if (ret) { + if (RADEON_DEBUG & DEBUG_MEMORY) + fprintf(stderr, "DRM_RADEON_ALLOC failed: %d\n", ret); + return 0; + } - allocated += size; + bo->base.virtual = (char*)bufmgr->rmesa->radeon.radeonScreen->gartTextures.map + baseoffset; + bo->base.offset = bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset + baseoffset; -#if 0 - static int t = 0; - if (t != time(NULL)) { - t = time(NULL); - fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n", - rmesa->rmm->u_last, bytes_wasted / 1024, - allocated / 1024); + return 1; +} + +/** + * Allocate a DMA buffer. + */ +static dri_bo *dma_alloc(radeon_bufmgr_classic *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic)); + + bo->functions = &dma_bo_functions; + + track_pending_buffers(bufmgr); + if (!try_dma_alloc(bufmgr, bo, size, alignment)) { + if (RADEON_DEBUG & DEBUG_MEMORY) + fprintf(stderr, "Failed to allocate %ld bytes, finishing command buffer...\n", size); + radeonFinish(bufmgr->rmesa->radeon.glCtx); + track_pending_buffers(bufmgr); + if (!try_dma_alloc(bufmgr, bo, size, alignment)) { + WARN_ONCE( + "Ran out of GART memory (for %ld)!\n" + "Please consider adjusting GARTSize option.\n", + size); + free(bo); + return 0; + } } -#endif - memfree.region = RADEON_MEM_REGION_GART; + init_buffer(bufmgr, bo, size); + bo->validated = 1; /* DMA buffer offsets are always valid */ - again: + return &bo->base; +} - done_age = radeonGetAge((radeonContextPtr) rmesa); +/** + * Free a command buffer + */ +static void cmdbuf_free(radeon_bo_classic *bo) +{ + free(bo->base.virtual); + free(bo); +} - if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size) - resize_u_list(rmesa); +static const radeon_bo_functions cmdbuf_bo_functions = { + .free = cmdbuf_free +}; - for (i = rmesa->rmm->u_last + 1; i > 0; i--) { - if (rmesa->rmm->u_list[i].ptr == NULL) { - free = i; - continue; +/** + * Allocate a command buffer. + * + * Command buffers are really just malloc'ed buffers. They are managed by + * the bufmgr to enable relocations. + */ +static dri_bo *cmdbuf_alloc(radeon_bufmgr_classic *bufmgr, const char *name, + unsigned long size) +{ + radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic)); + + bo->functions = &cmdbuf_bo_functions; + bo->base.virtual = malloc(size); + + init_buffer(bufmgr, bo, size); + return &bo->base; +} + +/** + * Free a VRAM-based buffer object. + */ +static void vram_free(radeon_bo_classic *bo_base) +{ + radeon_bo_vram *bo = get_bo_vram(bo_base); + + if (bo->vram) { + driDestroyTextureObject(&bo->vram->base); + bo->vram = 0; + } + + free(bo->base.base.virtual); + free(bo); +} + +/** + * Allocate/update the copy in vram. + * + * Note: Assume we're called with the DRI lock held. + */ +static void vram_validate(radeon_bo_classic *bo_base) +{ + radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr); + radeon_bo_vram *bo = get_bo_vram(bo_base); + + if (!bo->vram) { + bo->backing_store_dirty = 1; + + bo->vram = (radeon_vram_wrapper*)calloc(1, sizeof(radeon_vram_wrapper)); + bo->vram->bo = bo; + make_empty_list(&bo->vram->base); + bo->vram->base.totalSize = bo->base.base.size; + if (driAllocateTexture(&bufmgr->texture_heap, 1, &bo->vram->base) < 0) { + fprintf(stderr, "Ouch! vram_validate failed\n"); + free(bo->vram); + bo->base.base.offset = 0; + bo->vram = 0; + return; } + } + + assert(bo->vram->base.memBlock); + + bo->base.base.offset = bufmgr->texture_offset + bo->vram->base.memBlock->ofs; + + if (bo->backing_store_dirty) { + /* Copy to VRAM using a blit. + * All memory is 4K aligned. We're using 1024 pixels wide blits. + */ + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + int ret; - if (rmesa->rmm->u_list[i].h_pending == 0 && - rmesa->rmm->u_list[i].pending - && rmesa->rmm->u_list[i].age <= done_age) { - memfree.region_offset = - (char *)rmesa->rmm->u_list[i].ptr - - (char *)rmesa->radeon.radeonScreen->gartTextures. - map; + tex.offset = bo->base.base.offset; + tex.image = &tmp; - ret = - drmCommandWrite(rmesa->radeon.radeonScreen-> - driScreen->fd, DRM_RADEON_FREE, - &memfree, sizeof(memfree)); + assert(!(tex.offset & 1023)); + tmp.x = 0; + tmp.y = 0; + if (bo->base.base.size < 4096) { + tmp.width = (bo->base.base.size + 3) / 4; + tmp.height = 1; + } else { + tmp.width = 1024; + tmp.height = (bo->base.base.size + 4095) / 4096; + } + tmp.data = bo->base.base.virtual; + + tex.format = RADEON_TXFORMAT_ARGB8888; + tex.width = tmp.width; + tex.height = tmp.height; + tex.pitch = MAX2(tmp.width / 16, 1); + + do { + ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd, + DRM_RADEON_TEXTURE, &tex, + sizeof(drm_radeon_texture_t)); if (ret) { - fprintf(stderr, "Failed to free at %p\n", - rmesa->rmm->u_list[i].ptr); - fprintf(stderr, "ret = %s\n", strerror(-ret)); - exit(1); - } else { -#ifdef MM_DEBUG - fprintf(stderr, "really freed %d at age %x\n", - i, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - if (i == rmesa->rmm->u_last) - rmesa->rmm->u_last--; - - if (rmesa->rmm->u_list[i].size < 4096) - bytes_wasted -= - 4096 - rmesa->rmm->u_list[i].size; - - allocated -= rmesa->rmm->u_list[i].size; - rmesa->rmm->u_list[i].pending = 0; - rmesa->rmm->u_list[i].ptr = NULL; - free = i; + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); } - } + } while (ret == -EAGAIN); + + bo->backing_store_dirty = 0; } - rmesa->rmm->u_head = i; - - if (free == -1) { - WARN_ONCE("Ran out of slots!\n"); - //usleep(100); - r300FlushCmdBuf(rmesa, __FUNCTION__); - tries++; - if (tries > 100) { - WARN_ONCE("Ran out of slots!\n"); - exit(1); - } - goto again; + + bo->base.validated = 1; +} + +static void vram_dirty(radeon_bo_classic *bo_base) +{ + radeon_bo_vram *bo = get_bo_vram(bo_base); + + bo->base.validated = 0; + bo->backing_store_dirty = 1; +} + +static void vram_bind(radeon_bo_classic *bo_base) +{ + radeon_bo_vram *bo = get_bo_vram(bo_base); + + if (bo->vram) { + bo->vram->base.bound = 1; + driUpdateTextureLRU(&bo->vram->base); } +} - alloc.region = RADEON_MEM_REGION_GART; - alloc.alignment = alignment; - alloc.size = size; - alloc.region_offset = &offset; +static void vram_unbind(radeon_bo_classic *bo_base) +{ + radeon_bo_vram *bo = get_bo_vram(bo_base); - ret = - drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, - sizeof(alloc)); - if (ret) { -#if 0 - WARN_ONCE("Ran out of mem!\n"); - r300FlushCmdBuf(rmesa, __FUNCTION__); - //usleep(100); - tries2++; - tries = 0; - if (tries2 > 100) { - WARN_ONCE("Ran out of GART memory!\n"); - exit(1); - } - goto again; -#else - WARN_ONCE - ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n", - size); - return 0; -#endif + if (bo->vram) + bo->vram->base.bound = 0; +} + +/** Callback function called by the texture heap when a texture is evicted */ +static void destroy_vram_wrapper(void *data, driTextureObject *t) +{ + radeon_vram_wrapper *wrapper = (radeon_vram_wrapper*)t; + + if (wrapper->bo && wrapper->bo->vram == wrapper) { + wrapper->bo->base.validated = 0; + wrapper->bo->vram = 0; } +} - i = free; +static const radeon_bo_functions vram_bo_functions = { + .free = vram_free, + .validate = vram_validate, + .dirty = vram_dirty, + .bind = vram_bind, + .unbind = vram_unbind +}; - if (i > rmesa->rmm->u_last) - rmesa->rmm->u_last = i; +/** + * Free a VRAM-based buffer object. + */ +static void static_free(radeon_bo_classic *bo_base) +{ + radeon_bo_vram *bo = get_bo_vram(bo_base); - rmesa->rmm->u_list[i].ptr = - ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset; - rmesa->rmm->u_list[i].size = size; - rmesa->rmm->u_list[i].age = 0; - //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i); + free(bo); +} -#ifdef MM_DEBUG - fprintf(stderr, "allocated %d at age %x\n", i, - radeonGetAge((radeonContextPtr) rmesa)); -#endif +static void static_bind(radeon_bo_classic *bo_base) +{ +} - return i; +static void static_unbind(radeon_bo_classic *bo_base) +{ } -void r300_mem_use(r300ContextPtr rmesa, int id) +static void static_validate(radeon_bo_classic *bo_base) { - uint64_t ull; -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - drm_r300_cmd_header_t *cmd; +} - assert(id <= rmesa->rmm->u_last); +static void static_dirty(radeon_bo_classic *bo_base) +{ +} - if (id == 0) - return; +static const radeon_bo_functions static_bo_functions = { + .free = static_free, + .validate = static_validate, + .dirty = static_dirty, + .bind = static_bind, + .unbind = static_unbind +}; - cmd = - (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, - 2 + sizeof(ull) / 4, - __FUNCTION__); - cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; - cmd[0].scratch.reg = R300_MEM_SCRATCH; - cmd[0].scratch.n_bufs = 1; - cmd[0].scratch.flags = 0; - cmd++; - ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age; - _mesa_memcpy(cmd, &ull, sizeof(ull)); - cmd += sizeof(ull) / 4; +/** + * Allocate a backing store buffer object that is validated into VRAM. + */ +static dri_bo *vram_alloc(radeon_bufmgr_classic *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram)); + + bo->base.functions = &vram_bo_functions; + bo->base.base.virtual = malloc(size); + init_buffer(bufmgr, &bo->base, size); + return &bo->base.base; +} - cmd[0].u = /*id */ 0; - LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ - rmesa->rmm->u_list[id].h_pending++; - UNLOCK_HARDWARE(&rmesa->radeon); +static dri_bo *bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); + + if (location_mask & DRM_BO_MEM_CMDBUF) { + return cmdbuf_alloc(bufmgr, name, size); + } else if (location_mask & DRM_BO_MEM_DMA) { + return dma_alloc(bufmgr, name, size, alignment); + } else { + return vram_alloc(bufmgr, name, size, alignment); + } } -unsigned long r300_mem_offset(r300ContextPtr rmesa, int id) +static dri_bo *bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name, + unsigned long offset, unsigned long size, + void *virtual, uint64_t location_mask) { - unsigned long offset; + radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); + radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram)); - assert(id <= rmesa->rmm->u_last); + bo->base.functions = &static_bo_functions; + bo->base.base.virtual = virtual; + bo->base.base.offset = offset + bufmgr->rmesa->radeon.radeonScreen->fbLocation; + bo->base.validated = 1; /* Static buffer offsets are always valid */ - offset = (char *)rmesa->rmm->u_list[id].ptr - - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - offset += rmesa->radeon.radeonScreen->gart_texture_offset; + init_buffer(bufmgr, &bo->base, size); + return &bo->base.base; - return offset; } -void *r300_mem_map(r300ContextPtr rmesa, int id, int access) + + +static void bufmgr_classic_bo_reference(dri_bo *bo_base) { -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - void *ptr; - int tries = 0; + radeon_bo_classic *bo = get_bo_classic(bo_base); + bo->refcount++; + assert(bo->refcount > 0); +} - assert(id <= rmesa->rmm->u_last); +static void bufmgr_classic_bo_unreference(dri_bo *bo_base) +{ + radeon_bo_classic *bo = get_bo_classic(bo_base); - if (access == R300_MEM_R) { + if (!bo_base) + return; - if (rmesa->rmm->u_list[id].mapped == 1) - WARN_ONCE("buffer %d already mapped\n", id); + assert(bo->refcount > 0); + bo->refcount--; + if (!bo->refcount) { + // Ugly HACK - figure out whether this is really necessary + get_bufmgr_classic(bo_base->bufmgr)->rmesa->dma.nr_released_bufs++; - rmesa->rmm->u_list[id].mapped = 1; - ptr = r300_mem_ptr(rmesa, id); + assert(!bo->mapcount); + if (!bo->pending) + bo_free(bo); + } +} - return ptr; +static int bufmgr_classic_bo_map(dri_bo *bo_base, GLboolean write_enable) +{ + radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->bufmgr); + radeon_bo_classic *bo = get_bo_classic(bo_base); + assert(bo->refcount > 0); + + if (bo->pending) { + track_pending_buffers(bufmgr); + if (bo->pending) { + // TODO: Better fence waiting + if (RADEON_DEBUG & DEBUG_MEMORY) + fprintf(stderr, "bo_map: buffer is pending. Flushing...\n"); + radeonFinish(bufmgr->rmesa->radeon.glCtx); + track_pending_buffers(bufmgr); + if (bo->pending) { + fprintf(stderr, "Internal error or hardware lockup: bo_map: buffer is still pending.\n"); + abort(); + } + } } - if (rmesa->rmm->u_list[id].h_pending) - r300FlushCmdBuf(rmesa, __FUNCTION__); + if (write_enable && bo->functions->dirty) + bo->functions->dirty(bo); - if (rmesa->rmm->u_list[id].h_pending) { - return NULL; - } + bo->mapcount++; + assert(bo->mapcount > 0); + return 0; +} - while (rmesa->rmm->u_list[id].age > - radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000) - usleep(10); +static int bufmgr_classic_bo_unmap(dri_bo *buf) +{ + radeon_bo_classic *bo = get_bo_classic(buf); + assert(bo->refcount > 0); + assert(bo->mapcount > 0); + bo->mapcount--; + return 0; +} - if (tries >= 1000) { - fprintf(stderr, "Idling failed (%x vs %x)\n", - rmesa->rmm->u_list[id].age, - radeonGetAge((radeonContextPtr) rmesa)); - return NULL; +/** + * Mark the given buffer as pending and move it to the tail + * of the pending list. + * The caller is responsible for setting up pending_count and pending_age. + */ +static void move_to_pending_tail(radeon_bo_classic *bo) +{ + radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr); + + if (bo->pending) { + *bo->pending_pprev = bo->pending_next; + if (bo->pending_next) + bo->pending_next->pending_pprev = bo->pending_pprev; + else + bufmgr->pending_tail = bo->pending_pprev; } - if (rmesa->rmm->u_list[id].mapped == 1) - WARN_ONCE("buffer %d already mapped\n", id); + bo->pending = 1; + bo->pending_pprev = bufmgr->pending_tail; + bo->pending_next = 0; + *bufmgr->pending_tail = bo; + bufmgr->pending_tail = &bo->pending_next; +} - rmesa->rmm->u_list[id].mapped = 1; - ptr = r300_mem_ptr(rmesa, id); +/** + * Emit commands to the batch buffer that cause the guven buffer's + * pending_count and pending_age to be updated. + */ +static void emit_age_for_buffer(radeon_bo_classic* bo) +{ + radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr); + BATCH_LOCALS(bufmgr->rmesa); + drm_r300_cmd_header_t cmd; + uint64_t ull; - return ptr; + cmd.scratch.cmd_type = R300_CMD_SCRATCH; + cmd.scratch.reg = 2; /* Scratch register 2 corresponds to what radeonGetAge polls */ + cmd.scratch.n_bufs = 1; + cmd.scratch.flags = 0; + ull = (uint64_t) (intptr_t) &bo->pending_age; + + BEGIN_BATCH(4); + OUT_BATCH(cmd.u); + OUT_BATCH(ull & 0xffffffff); + OUT_BATCH(ull >> 32); + OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + + bo->pending_count++; } -void r300_mem_unmap(r300ContextPtr rmesa, int id) +static int bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target) { -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif + radeon_bo_classic *bo = get_bo_classic(batch_buf); + radeon_reloc *reloc; - assert(id <= rmesa->rmm->u_last); + if (bo->relocs_used >= bo->relocs_size) { + bo->relocs_size *= 2; + if (bo->relocs_size < 32) + bo->relocs_size = 32; - if (rmesa->rmm->u_list[id].mapped == 0) - WARN_ONCE("buffer %d not mapped\n", id); + bo->relocs = (radeon_reloc*)realloc(bo->relocs, bo->relocs_size*sizeof(radeon_reloc)); + } - rmesa->rmm->u_list[id].mapped = 0; + reloc = &bo->relocs[bo->relocs_used++]; + reloc->flags = flags; + reloc->offset = offset; + reloc->delta = delta; + reloc->target = get_bo_classic(target); + dri_bo_reference(target); + return 0; } -void r300_mem_free(r300ContextPtr rmesa, int id) +/* process_relocs is called just before the given command buffer + * is executed. It ensures that all referenced buffers are in + * the right GPU domain. + */ +static void *bufmgr_classic_process_relocs(dri_bo *batch_buf, GLuint *count) { -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif + radeon_bo_classic *batch_bo = get_bo_classic(batch_buf); + int i; - assert(id <= rmesa->rmm->u_last); + // Warning: At this point, we append something to the batch buffer + // during flush. + emit_age_for_buffer(batch_bo); + + dri_bo_map(batch_buf, GL_TRUE); + for(i = 0; i < batch_bo->relocs_used; ++i) { + radeon_reloc *reloc = &batch_bo->relocs[i]; + uint32_t *dest = (uint32_t*)((char*)batch_buf->virtual + reloc->offset); + uint32_t offset; + + if (!reloc->target->validated) + reloc->target->functions->validate(reloc->target); + reloc->target->used = 1; + offset = reloc->target->base.offset + reloc->delta; + + if (reloc->flags & DRM_RELOC_BLITTER) + *dest = (*dest & 0xffc00000) | (offset >> 10); + else if (reloc->flags & DRM_RELOC_TXOFFSET) + *dest = (*dest & 31) | (offset & ~31); + else + *dest = offset; + } + dri_bo_unmap(batch_buf); + return 0; +} - if (id == 0) - return; +/* post_submit is called just after the given command buffer + * is executed. It ensures that buffers are properly marked as + * pending. + */ +static void bufmgr_classic_post_submit(dri_bo *batch_buf, dri_fence **fence) +{ + radeon_bo_classic *batch_bo = get_bo_classic(batch_buf); + int i; - if (rmesa->rmm->u_list[id].ptr == NULL) { - WARN_ONCE("Not allocated!\n"); - return; + assert(!batch_bo->pending_count); + + for(i = 0; i < batch_bo->relocs_used; ++i) { + radeon_reloc *reloc = &batch_bo->relocs[i]; + + if (reloc->target->used) { + reloc->target->used = 0; + assert(!reloc->target->pending_count); + reloc->target->pending_age = batch_bo->pending_age; + move_to_pending_tail(reloc->target); + if (reloc->target->functions->bind) + (*reloc->target->functions->bind)(reloc->target); + } } +} - if (rmesa->rmm->u_list[id].pending) { - WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); - return; +static void bufmgr_classic_destroy(dri_bufmgr *bufmgr_ctx) +{ + radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx); + + track_pending_buffers(bufmgr); + if (bufmgr->pending) + radeonFinish(bufmgr->rmesa->radeon.glCtx); + track_pending_buffers(bufmgr); + + if (bufmgr->buffers) { + fprintf(stderr, "Warning: Buffer objects have leaked\n"); + while(bufmgr->buffers) { + fprintf(stderr, " Leak of size %ld\n", bufmgr->buffers->base.size); + bufmgr->buffers->refcount = 0; + bufmgr->buffers->mapcount = 0; + bufmgr->buffers->pending = 0; + bo_free(bufmgr->buffers); + } } - rmesa->rmm->u_list[id].pending = 1; + driDestroyTextureHeap(bufmgr->texture_heap); + bufmgr->texture_heap = 0; + assert(is_empty_list(&bufmgr->texture_swapped)); + + free(bufmgr); +} + +radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa) +{ + radeon_bufmgr_classic* bufmgr = (radeon_bufmgr_classic*)calloc(1, sizeof(radeon_bufmgr_classic)); + + bufmgr->rmesa = rmesa; + bufmgr->base.base.bo_alloc = &bufmgr_classic_bo_alloc; + bufmgr->base.base.bo_alloc_static = bufmgr_classic_bo_alloc_static; + bufmgr->base.base.bo_reference = &bufmgr_classic_bo_reference; + bufmgr->base.base.bo_unreference = &bufmgr_classic_bo_unreference; + bufmgr->base.base.bo_map = &bufmgr_classic_bo_map; + bufmgr->base.base.bo_unmap = &bufmgr_classic_bo_unmap; + bufmgr->base.base.emit_reloc = &bufmgr_classic_emit_reloc; + bufmgr->base.base.process_relocs = &bufmgr_classic_process_relocs; + bufmgr->base.base.post_submit = &bufmgr_classic_post_submit; + bufmgr->base.base.destroy = &bufmgr_classic_destroy; + + bufmgr->pending_tail = &bufmgr->pending; + + /* Init texture heap */ + make_empty_list(&bufmgr->texture_swapped); + bufmgr->texture_heap = driCreateTextureHeap(0, bufmgr, + rmesa->radeon.radeonScreen->texSize[0], 12, RADEON_NR_TEX_REGIONS, + (drmTextureRegionPtr)rmesa->radeon.sarea->tex_list[0], + &rmesa->radeon.sarea->tex_age[0], + &bufmgr->texture_swapped, sizeof(radeon_vram_wrapper), + &destroy_vram_wrapper); + bufmgr->texture_offset = rmesa->radeon.radeonScreen->texOffset[0]; + + return &bufmgr->base; +} + +void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx) +{ + radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(&bufmgr_ctx->base); + + DRI_AGE_TEXTURES(bufmgr->texture_heap); } -#endif diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h index 625a7f6..4e9be65 100644 --- a/src/mesa/drivers/dri/r300/r300_mem.h +++ b/src/mesa/drivers/dri/r300/r300_mem.h @@ -1,37 +1,22 @@ #ifndef __R300_MEM_H__ #define __R300_MEM_H__ -//#define R300_MEM_PDL 0 -#define R300_MEM_UL 1 +#include "glheader.h" +#include "dri_bufmgr.h" -#define R300_MEM_R 1 -#define R300_MEM_W 2 -#define R300_MEM_RW (R300_MEM_R | R300_MEM_W) +#include "r300_context.h" -#define R300_MEM_SCRATCH 2 -struct r300_memory_manager { - struct { - void *ptr; - uint32_t size; - uint32_t age; - uint32_t h_pending; - int pending; - int mapped; - } *u_list; - int u_head, u_size, u_last; +/* Note: The following flags should probably be ultimately eliminated, + * or replaced by something else. + */ +#define DRM_BO_MEM_DMA (1 << 27) /** Use for transient buffers (texture upload, vertex buffers...) */ +#define DRM_BO_MEM_CMDBUF (1 << 28) /** Use for command buffers */ -}; +#define DRM_RELOC_BLITTER (1 << 23) /** Offset overwrites lower 22 bits (used with blit packet3) */ +#define DRM_RELOC_TXOFFSET (1 << 24) /** Offset overwrites everything but low bits (used for texture offsets) */ -extern void r300_mem_init(r300ContextPtr rmesa); -extern void r300_mem_destroy(r300ContextPtr rmesa); -extern void *r300_mem_ptr(r300ContextPtr rmesa, int id); -extern int r300_mem_find(r300ContextPtr rmesa, void *ptr); -extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size); -extern void r300_mem_use(r300ContextPtr rmesa, int id); -extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id); -extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access); -extern void r300_mem_unmap(r300ContextPtr rmesa, int id); -extern void r300_mem_free(r300ContextPtr rmesa, int id); +radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa); +void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c new file mode 100644 index 0000000..c3b918c --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_mipmap_tree.h" + +#include +#include + +#include "simple_list.h" +#include "texcompress.h" +#include "texformat.h" + +#include "r300_mem.h" + +static GLuint r300_compressed_texture_size(GLcontext *ctx, + GLsizei width, GLsizei height, GLsizei depth, + GLuint mesaFormat) +{ + GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); + + if (mesaFormat == MESA_FORMAT_RGB_DXT1 || + mesaFormat == MESA_FORMAT_RGBA_DXT1) { + if (width + 3 < 8) /* width one block */ + size = size * 4; + else if (width + 3 < 16) + size = size * 2; + } else { + /* DXT3/5, 16 bytes per block */ + WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); + if (width + 3 < 8) + size = size * 2; + } + + return size; +} + +/** + * Compute sizes and fill in offset and blit information for the given + * image (determined by \p face and \p level). + * + * \param curOffset points to the offset at which the image is to be stored + * and is updated by this function according to the size of the image. + */ +static void compute_tex_image_offset(r300_mipmap_tree *mt, + GLuint face, GLuint level, GLuint* curOffset) +{ + r300_mipmap_level *lvl = &mt->levels[level]; + + /* Find image size in bytes */ + if (mt->compressed) { + lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx, + lvl->width, lvl->height, lvl->depth, mt->compressed); + } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { + lvl->size = ((lvl->width * mt->bpp + 63) & ~63) * lvl->height; + } else if (mt->tilebits & R300_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + * though the actual offset may be different (if texture is less than + * 32 bytes width) to the untiled case */ + int w = (lvl->width * mt->bpp * 2 + 31) & ~31; + lvl->size = (w * ((lvl->height + 1) / 2)) * lvl->depth; + } else { + int w = (lvl->width * mt->bpp + 31) & ~31; + lvl->size = w * lvl->height * lvl->depth; + } + assert(lvl->size > 0); + + /* All images are aligned to a 32-byte offset */ + *curOffset = (*curOffset + 0x1f) & ~0x1f; + lvl->faces[face].offset = *curOffset; + *curOffset += lvl->size; +} + +static GLuint minify(GLuint size, GLuint levels) +{ + size = size >> levels; + if (size < 1) + size = 1; + return size; +} + +static void calculate_miptree_layout(r300_mipmap_tree *mt) +{ + GLuint curOffset; + GLuint numLevels; + GLuint i; + + numLevels = mt->lastLevel - mt->firstLevel + 1; + assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + + curOffset = 0; + for(i = 0; i < numLevels; i++) { + GLuint face; + + mt->levels[i].width = minify(mt->width0, mt->firstLevel + i); + mt->levels[i].height = minify(mt->height0, mt->firstLevel + i); + mt->levels[i].depth = minify(mt->depth0, mt->firstLevel + i); + + for(face = 0; face < mt->faces; face++) + compute_tex_image_offset(mt, face, i, &curOffset); + } + + /* Note the required size in memory */ + mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +} + + +/** + * Create a new mipmap tree, calculate its layout and allocate memory. + */ +r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t, + GLenum target, GLuint firstLevel, GLuint lastLevel, + GLuint width0, GLuint height0, GLuint depth0, + GLuint bpp, GLuint tilebits, GLuint compressed) +{ + r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree); + + mt->r300 = rmesa; + mt->t = t; + mt->target = target; + mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + mt->firstLevel = firstLevel; + mt->lastLevel = lastLevel; + mt->width0 = width0; + mt->height0 = height0; + mt->depth0 = depth0; + mt->bpp = bpp; + mt->tilebits = tilebits; + mt->compressed = compressed; + + calculate_miptree_layout(mt); + + mt->bo = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "texture", mt->totalsize, 1024, 0); + + return mt; +} + +/** + * Destroy the given mipmap tree. + */ +void r300_miptree_destroy(r300_mipmap_tree *mt) +{ + dri_bo_unreference(mt->bo); + free(mt); +} + +/* + * XXX Move this into core Mesa? + */ +static void +_mesa_copy_rect(GLubyte * dst, + GLuint cpp, + GLuint dst_pitch, + GLuint dst_x, + GLuint dst_y, + GLuint width, + GLuint height, + const GLubyte * src, + GLuint src_pitch, GLuint src_x, GLuint src_y) +{ + GLuint i; + + dst_pitch *= cpp; + src_pitch *= cpp; + dst += dst_x * cpp; + src += src_x * cpp; + dst += dst_y * dst_pitch; + src += src_y * dst_pitch; + width *= cpp; + + if (width == dst_pitch && width == src_pitch) + memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + dst += dst_pitch; + src += src_pitch; + } + } +} + +/** + * Upload the given texture image to the given face/level of the mipmap tree. + * \param level of the texture, i.e. \c level==mt->firstLevel is the first hw level + */ +void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level, + struct gl_texture_image *texImage) +{ + GLuint hwlevel = level - mt->firstLevel; + r300_mipmap_level *lvl = &mt->levels[hwlevel]; + void *dest; + + assert(face < mt->faces); + assert(level >= mt->firstLevel && level <= mt->lastLevel); + assert(texImage && texImage->Data); + assert(texImage->Width == lvl->width); + assert(texImage->Height == lvl->height); + assert(texImage->Depth == lvl->depth); + + dri_bo_map(mt->bo, GL_TRUE); + + dest = mt->bo->virtual + lvl->faces[face].offset; + + if (mt->tilebits) + WARN_ONCE("%s: tiling not supported yet", __FUNCTION__); + + if (!mt->compressed) { + GLuint dst_align; + GLuint dst_pitch = lvl->width; + GLuint src_pitch = lvl->width; + + if (mt->target == GL_TEXTURE_RECTANGLE_NV) + dst_align = 64 / mt->bpp; + else + dst_align = 32 / mt->bpp; + dst_pitch = (dst_pitch + dst_align - 1) & ~(dst_align - 1); + + _mesa_copy_rect(dest, mt->bpp, dst_pitch, 0, 0, lvl->width, lvl->height, + texImage->Data, src_pitch, 0, 0); + } else { + memcpy(dest, texImage->Data, lvl->size); + } + + dri_bo_unmap(mt->bo); +} diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h new file mode 100644 index 0000000..a888ecf --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_MIPMAP_TREE_H_ +#define __R300_MIPMAP_TREE_H_ + +#include "r300_context.h" + +typedef struct _r300_mipmap_tree r300_mipmap_tree; +typedef struct _r300_mipmap_level r300_mipmap_level; +typedef struct _r300_mipmap_image r300_mipmap_image; + +struct _r300_mipmap_image { + GLuint offset; /** Offset of this image from the start of mipmap tree, in bytes */ +}; + +struct _r300_mipmap_level { + GLuint width; + GLuint height; + GLuint depth; + GLuint size; /** Size of each image, in bytes */ + r300_mipmap_image faces[6]; +}; + + +/** + * A mipmap tree contains texture images in the layout that the hardware + * expects. + * + * The meta-data of mipmap trees is immutable, i.e. you cannot change the + * layout on-the-fly; however, the texture contents (i.e. texels) can be + * changed. + */ +struct _r300_mipmap_tree { + r300ContextPtr r300; + r300TexObj *t; + dri_bo *bo; + + GLuint totalsize; /** total size of the miptree, in bytes */ + + GLenum target; /** GL_TEXTURE_xxx */ + GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ + GLuint firstLevel; /** First mip level stored in this mipmap tree */ + GLuint lastLevel; /** Last mip level stored in this mipmap tree */ + + GLuint width0; /** Width of level 0 image */ + GLuint height0; /** Height of level 0 image */ + GLuint depth0; /** Depth of level 0 image */ + + GLuint bpp; /** Bytes per texel */ + GLuint tilebits; /** R300_TXO_xxx_TILE */ + GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ + + r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS]; +}; + +r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t, + GLenum target, GLuint firstLevel, GLuint lastLevel, + GLuint width0, GLuint height0, GLuint depth0, + GLuint bpp, GLuint tilebits, GLuint compressed); +void r300_miptree_destroy(r300_mipmap_tree *mt); + +void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level, + struct gl_texture_image *texImage); + + +#endif /* __R300_MIPMAP_TREE_H_ */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 0a199e6..209fae9 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -175,89 +175,79 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_dma_region *rvb = &rmesa->state.elt_dma; void *out; - if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { - rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; - rvb->start = ((char *)elts) - rvb->address; - rvb->aos_offset = - rmesa->radeon.radeonScreen->gart_texture_offset + - rvb->start; - return; - } else if (r300IsGartMemory(rmesa, elts, 1)) { - WARN_ONCE("Pointer not within GART memory!\n"); - _mesa_exit(-1); - } - - r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); - rvb->aos_offset = GET_START(rvb); + r300AllocDmaRegion(rmesa, &rmesa->state.elt_dma_bo, &rmesa->state.elt_dma_offset, + n_elts * 4, 4); - out = rvb->address + rvb->start; + out = rmesa->state.elt_dma_bo->virtual + rmesa->state.elt_dma_offset; memcpy(out, elts, n_elts * 4); } -static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, - int vertex_count, int type) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) { - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(rmesa); - start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + BEGIN_BATCH(8); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr); - e32(vertex_count); + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo, rmesa->state.elt_dma_offset, 0); + OUT_BATCH(vertex_count); + END_BATCH(); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) { + BATCH_LOCALS(rmesa); int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); - e32(nr); + BEGIN_BATCH(sz+2); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); for (i = 0; i + 1 < nr; i += 2) { - e32((rmesa->state.aos[i].aos_size << 0) | - (rmesa->state.aos[i].aos_stride << 8) | - (rmesa->state.aos[i + 1].aos_size << 16) | - (rmesa->state.aos[i + 1].aos_stride << 24)); - - e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); - e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); + OUT_BATCH((rmesa->state.aos[i].components << 0) | + (rmesa->state.aos[i].stride << 8) | + (rmesa->state.aos[i + 1].components << 16) | + (rmesa->state.aos[i + 1].stride << 24)); + + OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo, + rmesa->state.aos[i].offset + offset * 4 * rmesa->state.aos[i].stride, 0); + OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo, + rmesa->state.aos[i+1].offset + offset * 4 * rmesa->state.aos[i + 1].stride, 0); } if (nr & 1) { - e32((rmesa->state.aos[nr - 1].aos_size << 0) | - (rmesa->state.aos[nr - 1].aos_stride << 8)); - e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | + (rmesa->state.aos[nr - 1].stride << 8)); + OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo, + rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0); } + END_BATCH(); } static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(rmesa); - start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); } static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, int start, int end, int prim) { + BATCH_LOCALS(rmesa); int type, num_verts; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; @@ -268,6 +258,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, if (type < 0 || num_verts <= 0) return; + /* Make space for at least 64 dwords. + * This is supposed to ensure that we can get all rendering + * commands into a single command buffer. + */ + r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__); + if (vb->Elts) { if (num_verts > 65535) { /* not implemented yet */ @@ -287,11 +283,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, */ r300EmitElts(ctx, vb->Elts, num_verts); r300EmitAOS(rmesa, rmesa->state.aos_count, start); - r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); + r300FireEB(rmesa, num_verts, type); } else { r300EmitAOS(rmesa, rmesa->state.aos_count, start); r300FireAOS(rmesa, num_verts, type); } + COMMIT_BATCH(); } static GLboolean r300RunRender(GLcontext * ctx, @@ -324,10 +321,6 @@ static GLboolean r300RunRender(GLcontext * ctx, r300EmitCacheFlush(rmesa); -#ifdef USER_BUFFERS - r300UseArrays(ctx); -#endif - r300ReleaseArrays(ctx); return GL_FALSE; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index cce07d3..b314764 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_ioctl.h" #include "radeon_state.h" +#include "radeon_buffer.h" #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" @@ -1148,39 +1149,25 @@ void r300UpdateDrawBuffer(GLcontext * ctx) r300ContextPtr rmesa = R300_CONTEXT(ctx); r300ContextPtr r300 = rmesa; struct gl_framebuffer *fb = ctx->DrawBuffer; - driRenderbuffer *drb; + struct radeon_renderbuffer *rrb; if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { /* draw to front */ - drb = - (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. - Renderbuffer; + rrb = + (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { /* draw to back */ - drb = - (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. - Renderbuffer; + rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; } else { /* drawing to multiple buffers, or none */ return; } - assert(drb); - assert(drb->flippedPitch); + assert(rrb); + assert(rrb->pitch); R300_STATECHANGE(rmesa, cb); - r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch; - - if (r300->radeon.radeonScreen->cpp == 4) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; - else - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; #if 0 R200_STATECHANGE(rmesa, ctx); @@ -1499,14 +1486,9 @@ static void r300SetupTextures(GLcontext * ctx) /* We cannot let disabled tmu offsets pass DRM */ for (i = 0; i < mtu; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { - -#if 0 /* Enables old behaviour */ - hw_tmu = i; -#endif tmu_mappings[i] = hw_tmu; - t = r300->state.texture.unit[i].texobj; - /* XXX questionable fix for bug 9170: */ + t = r300_tex_obj(ctx->Texture.Unit[i]._Current); if (!t) continue; @@ -1532,21 +1514,20 @@ static void r300SetupTextures(GLcontext * ctx) */ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->filter_1 | - translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias); + translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->format; r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pitch_reg; - r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + - hw_tmu] = t->offset; + r300->hw.textures[hw_tmu] = t; - if (t->offset & R300_TXO_MACRO_TILE) { + if (t->tile_bits & R300_TXO_MACRO_TILE) { WARN_ONCE("macro tiling enabled!\n"); } - if (t->offset & R300_TXO_MICRO_TILE) { + if (t->tile_bits & R300_TXO_MICRO_TILE) { WARN_ONCE("micro tiling enabled!\n"); } @@ -2373,20 +2354,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300BlendColor(ctx, ctx->Color.BlendColor); - /* Again, r300ClearBuffer uses this */ - r300->hw.cb.cmd[R300_CB_OFFSET] = - r300->radeon.state.color.drawOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; - - if (r300->radeon.radeonScreen->cpp == 4) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; - else - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; - r300->hw.rb3d_dither_ctl.cmd[1] = 0; r300->hw.rb3d_dither_ctl.cmd[2] = 0; r300->hw.rb3d_dither_ctl.cmd[3] = 0; @@ -2402,10 +2369,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; - r300->hw.zb.cmd[R300_ZB_OFFSET] = - r300->radeon.radeonScreen->depthOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; if (r300->radeon.sarea->tiling_enabled) { /* XXX: Turn off when clearing buffers ? */ diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 0589ab7..96177ba 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_FIREVERTICES( r300 ) \ do { \ \ - if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \ + if ( (r300)->cmdbuf.committed || (r300)->dma.flush ) { \ r300Flush( (r300)->radeon.glCtx ); \ } \ \ diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 8aebd9b..f4a0b7f 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -61,7 +61,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. static void flush_last_swtcl_prim( r300ContextPtr rmesa ); -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset); void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); #define EMIT_ATTR( ATTR, STYLE ) \ do { \ @@ -175,7 +175,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) inputs[i] = -1; } } - + /* Fixed, apply to vir0 only */ if (InputsRead & (1 << VERT_ATTRIB_POS)) inputs[VERT_ATTRIB_POS] = 0; @@ -186,16 +186,16 @@ static void r300SetVertexFormat( GLcontext *ctx ) for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) if (InputsRead & (1 << i)) inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) { tab[nr++] = i; } } - + for (i = 0; i < nr; i++) { int ci; - + swizzle[i][0] = SWIZZLE_ZERO; swizzle[i][1] = SWIZZLE_ZERO; swizzle[i][2] = SWIZZLE_ZERO; @@ -215,21 +215,21 @@ static void r300SetVertexFormat( GLcontext *ctx ) ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); - + R300_STATECHANGE(rmesa, vic); rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - + R300_STATECHANGE(rmesa, vof); rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; - + rmesa->swtcl.vertex_size = _tnl_install_attrs( ctx, - rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attrs, rmesa->swtcl.vertex_attr_count, NULL, 0 ); - + rmesa->swtcl.vertex_size /= 4; RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); @@ -245,38 +245,40 @@ static void r300SetVertexFormat( GLcontext *ctx ) */ static void flush_last_swtcl_prim( r300ContextPtr rmesa ) { + BATCH_LOCALS(rmesa); + if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - + rmesa->dma.flush = NULL; - if (rmesa->dma.current.buf) { - struct r300_dma_region *current = &rmesa->dma.current; - GLuint current_offset = GET_START(current); + if (rmesa->dma.current) { + GLuint current_offset = rmesa->dma.current_used; - assert (current->start + + assert (rmesa->dma.current_used + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - current->ptr); + rmesa->dma.current_vertexptr); - if (rmesa->dma.current.start != rmesa->dma.current.ptr) { + if (rmesa->dma.current_used != rmesa->dma.current_vertexptr) { + rmesa->dma.current_used = rmesa->dma.current_vertexptr; r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); - + r300EmitState(rmesa); - + r300EmitVertexAOS( rmesa, rmesa->swtcl.vertex_size, - current_offset); - + rmesa->dma.current, current_offset); + r300EmitVbufPrim( rmesa, rmesa->swtcl.hw_primitive, rmesa->swtcl.numverts); - + r300EmitCacheFlush(rmesa); + COMMIT_BATCH(); } - + rmesa->swtcl.numverts = 0; - current->start = current->ptr; } } @@ -287,7 +289,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) { GLuint bytes = vsize * nverts; - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) r300RefillCurrentDmaRegion( rmesa, bytes); if (!rmesa->dma.flush) { @@ -297,13 +299,13 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); - ASSERT( rmesa->dma.current.start + + ASSERT( rmesa->dma.current_used + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - rmesa->dma.current.ptr ); + rmesa->dma.current_vertexptr ); { - GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); - rmesa->dma.current.ptr += bytes; + GLubyte *head = (GLubyte *) (rmesa->dma.current->virtual + rmesa->dma.current_vertexptr); + rmesa->dma.current_vertexptr += bytes; rmesa->swtcl.numverts += nverts; return head; } @@ -352,7 +354,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); r300ContextPtr rmesa = R300_CONTEXT(ctx); \ const char *r300verts = (char *)rmesa->swtcl.verts; #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) -#define VERTEX r300Vertex +#define VERTEX r300Vertex #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) #define PRINT_VERTEX(x) #undef TAG @@ -572,15 +574,15 @@ static void r300RenderStart(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT( ctx ); // fprintf(stderr, "%s\n", __FUNCTION__); - r300ChooseRenderState(ctx); + r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); - - if (rmesa->dma.flush != 0 && + + if (rmesa->dma.flush != 0 && rmesa->dma.flush != flush_last_swtcl_prim) rmesa->dma.flush( rmesa ); @@ -593,7 +595,7 @@ static void r300RenderFinish(GLcontext *ctx) static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - + if (rmesa->swtcl.hw_primitive != hwprim) { R300_NEWPRIM( rmesa ); rmesa->swtcl.hw_primitive = hwprim; @@ -611,7 +613,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) r300RasterPrimitive( ctx, reduced_prim[prim] ); // fprintf(stderr, "%s\n", __FUNCTION__); - + } static void r300ResetLineStipple(GLcontext *ctx) @@ -625,12 +627,12 @@ void r300InitSwtcl(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); r300ContextPtr rmesa = R300_CONTEXT(ctx); static int firsttime = 1; - + if (firsttime) { init_rast_tab(); firsttime = 0; } - + tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; @@ -638,15 +640,15 @@ void r300InitSwtcl(GLcontext *ctx) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - + /* FIXME: what are these numbers? */ - _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 48 * sizeof(GLfloat) ); - + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; rmesa->swtcl.RenderIndex = ~0; rmesa->swtcl.render_primitive = GL_TRIANGLES; - rmesa->swtcl.hw_primitive = 0; + rmesa->swtcl.hw_primitive = 0; _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); @@ -655,9 +657,9 @@ void r300InitSwtcl(GLcontext *ctx) _tnl_need_projected_coords( ctx, GL_FALSE ); r300ChooseRenderState(ctx); - _mesa_validate_all_lighting_tables( ctx ); + _mesa_validate_all_lighting_tables( ctx ); - tnl->Driver.NotifyMaterialChange = + tnl->Driver.NotifyMaterialChange = _mesa_validate_all_lighting_tables; } @@ -665,33 +667,32 @@ void r300DestroySwtcl(GLcontext *ctx) { } -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset) { - int cmd_reserved = 0; - int cmd_written = 0; + BATCH_LOCALS(rmesa); - drm_radeon_cmd_header_t *cmd = NULL; if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", - __FUNCTION__, vertex_size, offset); - - start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); - e32(1); - e32(vertex_size | (vertex_size << 8)); - e32(offset); + fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + BEGIN_BATCH(5); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); + OUT_BATCH(1); + OUT_BATCH(vertex_size | (vertex_size << 8)); + OUT_BATCH_RELOC(0, bo, offset, 0); + END_BATCH(); } void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) { - - int cmd_reserved = 0; - int cmd_written = 0; + BATCH_LOCALS(rmesa); int type, num_verts; - drm_radeon_cmd_header_t *cmd = NULL; type = r300PrimitiveType(rmesa, primitive); num_verts = r300NumVerts(rmesa, vertex_nr, primitive); - - start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); + END_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index f7f4972..c6ee1b5 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_state.h" #include "r300_ioctl.h" +#include "r300_mipmap_tree.h" #include "r300_tex.h" #include "xmlpool.h" @@ -78,7 +79,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) */ static void r300UpdateTexWrap(r300TexObjPtr t) { - struct gl_texture_object *tObj = t->base.tObj; + struct gl_texture_object *tObj = &t->base; t->filter &= ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); @@ -175,39 +176,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); } -/** - * Allocate space for and load the mesa images into the texture memory block. - * This will happen before drawing with a new texture, or drawing with a - * texture after it was swapped out or teximaged again. - */ - -static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) -{ - r300TexObjPtr t; - - t = CALLOC_STRUCT(r300_tex_obj); - texObj->DriverData = t; - if (t != NULL) { - if (RADEON_DEBUG & DEBUG_TEXTURE) { - fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, - (void *)texObj, (void *)t); - } - - /* Initialize non-image-dependent parts of the state: - */ - t->base.tObj = texObj; - t->border_fallback = GL_FALSE; - - make_empty_list(&t->base); - - r300UpdateTexWrap(t); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); - r300SetTexBorderColor(t, texObj->_BorderChan); - } - - return t; -} - /* try to find a format which will only need a memcopy */ static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, GLenum srcType) @@ -433,95 +401,14 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, return NULL; /* never get here */ } -static GLboolean -r300ValidateClientStorage(GLcontext * ctx, GLenum target, - GLint internalFormat, - GLint srcWidth, GLint srcHeight, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) +/** + * Marks the given face/level pair as dirty. + * This will cause an appropriate texture reupload the next time this + * texture is validated. + */ +static void mark_texture_image_dirty(r300TexObj *t, int face, int level) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "intformat %s format %s type %s\n", - _mesa_lookup_enum_by_nr(internalFormat), - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); - - if (!ctx->Unpack.ClientStorage) - return 0; - - if (ctx->_ImageTransferState || - texImage->IsCompressed || texObj->GenerateMipmap) - return 0; - - /* This list is incomplete, may be different on ppc??? - */ - switch (internalFormat) { - case GL_RGBA: - if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { - texImage->TexFormat = _dri_texformat_argb8888; - } else - return 0; - break; - - case GL_RGB: - if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { - texImage->TexFormat = _dri_texformat_rgb565; - } else - return 0; - break; - - case GL_YCBCR_MESA: - if (format == GL_YCBCR_MESA && - type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { - texImage->TexFormat = &_mesa_texformat_ycbcr_rev; - } else if (format == GL_YCBCR_MESA && - (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE)) { - texImage->TexFormat = &_mesa_texformat_ycbcr; - } else - return 0; - break; - - default: - return 0; - } - - /* Could deal with these packing issues, but currently don't: - */ - if (packing->SkipPixels || - packing->SkipRows || packing->SwapBytes || packing->LsbFirst) { - return 0; - } - - GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, - format, type); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: srcRowStride %d/%x\n", - __FUNCTION__, srcRowStride, srcRowStride); - - /* Could check this later in upload, pitch restrictions could be - * relaxed, but would need to store the image pitch somewhere, - * as packing details might change before image is uploaded: - */ - if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) - || (srcRowStride & 63)) - return 0; - - /* Have validated that _mesa_transfer_teximage would be a straight - * memcpy at this point. NOTE: future calls to TexSubImage will - * overwrite the client data. This is explicitly mentioned in the - * extension spec. - */ - texImage->Data = (void *)pixels; - texImage->IsClientData = GL_TRUE; - texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; - - return 1; + t->dirty_images[face] |= 1 << level; } static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -532,24 +419,13 @@ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); - return; - } - } - - /* Note, this will call ChooseTextureFormat */ _mesa_store_teximage1d(ctx, target, level, internalFormat, width, border, format, type, pixels, &ctx->Unpack, texObj, texImage); - t->dirty_images[0] |= (1 << level); + mark_texture_image_dirty(t, 0, level); } static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -561,24 +437,13 @@ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; - - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); - return; - } - } + r300TexObj* t = r300_tex_obj(texObj); _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, format, type, pixels, packing, texObj, texImage); - t->dirty_images[0] |= (1 << level); + mark_texture_image_dirty(t, 0, level); } static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -589,7 +454,7 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); GLuint face; /* which cube face or ordinary 2D image */ @@ -608,43 +473,23 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, face = 0; } - if (t != NULL) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); - return; - } - } - texImage->IsClientData = GL_FALSE; - if (r300ValidateClientStorage(ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", - __FUNCTION__); - } else { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", - __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r300ChooseTextureFormat. - */ - _mesa_store_teximage2d(ctx, target, level, internalFormat, - width, height, border, format, type, - pixels, &ctx->Unpack, texObj, texImage); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", + __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r300ChooseTextureFormat. + */ + _mesa_store_teximage2d(ctx, target, level, internalFormat, + width, height, border, format, type, + pixels, &ctx->Unpack, texObj, texImage); - t->dirty_images[face] |= (1 << level); - } + mark_texture_image_dirty(t, face, level); } static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -656,7 +501,7 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); GLuint face; /* which cube face or ordinary 2D image */ @@ -675,22 +520,11 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, face = 0; } - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); - return; - } - } - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, height, format, type, pixels, packing, texObj, texImage); - t->dirty_images[face] |= (1 << level); + mark_texture_image_dirty(t, face, level); } static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, @@ -700,7 +534,7 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); GLuint face; /* which cube face or ordinary 2D image */ @@ -719,49 +553,24 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, face = 0; } - if (t != NULL) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glCompressedTexImage2D"); - return; - } - } - texImage->IsClientData = GL_FALSE; - /* can't call this, different parameters. Would never evaluate to true anyway currently */ -#if 0 - if (r300ValidateClientStorage(ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", - __FUNCTION__); - } else -#endif - { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", - __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r300ChooseTextureFormat. - */ - _mesa_store_compressed_teximage2d(ctx, target, level, - internalFormat, width, height, - border, imageSize, data, - texObj, texImage); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", + __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r300ChooseTextureFormat. + */ + _mesa_store_compressed_teximage2d(ctx, target, level, + internalFormat, width, height, + border, imageSize, data, + texObj, texImage); - t->dirty_images[face] |= (1 << level); - } + mark_texture_image_dirty(t, face, level); } static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, @@ -772,7 +581,7 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); GLuint face; /* which cube face or ordinary 2D image */ @@ -791,23 +600,11 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, face = 0; } - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glCompressedTexSubImage3D"); - return; - } - } - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, height, format, imageSize, data, texObj, texImage); - t->dirty_images[face] |= (1 << level); + mark_texture_image_dirty(t, face, level); } static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, @@ -819,49 +616,26 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; - - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); - return; - } - } + r300TexObj* t = r300_tex_obj(texObj); texImage->IsClientData = GL_FALSE; -#if 0 - if (r300ValidateClientStorage(ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", - __FUNCTION__); - } else -#endif - { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", - __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r300ChooseTextureFormat. - */ - _mesa_store_teximage3d(ctx, target, level, internalFormat, - width, height, depth, border, - format, type, pixels, - &ctx->Unpack, texObj, texImage); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", + __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r300ChooseTextureFormat. + */ + _mesa_store_teximage3d(ctx, target, level, internalFormat, + width, height, depth, border, + format, type, pixels, + &ctx->Unpack, texObj, texImage); - t->dirty_images[0] |= (1 << level); - } + mark_texture_image_dirty(t, 0, level); } static void @@ -874,28 +648,14 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - driTextureObject *t = (driTextureObject *) texObj->DriverData; - -/* fprintf(stderr, "%s\n", __FUNCTION__); */ - - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); - return; - } - texObj->DriverData = t; - } + r300TexObj* t = r300_tex_obj(texObj); _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, width, height, depth, format, type, pixels, packing, texObj, texImage); - t->dirty_images[0] |= (1 << level); + mark_texture_image_dirty(t, 0, level); } /** @@ -907,7 +667,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat * params) { - r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, @@ -940,7 +700,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, * we just have to rely on loading the right subset of mipmap levels * to simulate a clamped LOD. */ - driSwapOutTextureObject((driTextureObject *) t); + if (t->mt) { + r300_miptree_destroy(t->mt); + t->mt = 0; + } break; case GL_DEPTH_TEXTURE_MODE: @@ -963,27 +726,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, } } -static void r300BindTexture(GLcontext * ctx, GLenum target, - struct gl_texture_object *texObj) -{ - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__, - (void *)texObj, ctx->Texture.CurrentUnit); - } - - if ((target == GL_TEXTURE_1D) - || (target == GL_TEXTURE_2D) - || (target == GL_TEXTURE_3D) - || (target == GL_TEXTURE_CUBE_MAP) - || (target == GL_TEXTURE_RECTANGLE_NV)) { - assert(texObj->DriverData != NULL); - } -} - static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - driTextureObject *t = (driTextureObject *) texObj->DriverData; + r300TexObj* t = r300_tex_obj(texObj); if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, @@ -991,14 +737,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) _mesa_lookup_enum_by_nr(texObj->Target)); } - if (t != NULL) { - if (rmesa) { - R300_FIREVERTICES(rmesa); - } + if (rmesa) { + int i; + R300_FIREVERTICES(rmesa); + + for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) + if (rmesa->hw.textures[i] == t) + rmesa->hw.textures[i] = 0; + } - driDestroyTextureObject(t); + if (t->mt) { + r300_miptree_destroy(t->mt); + t->mt = 0; } - /* Free mipmap images and the texture object itself */ _mesa_delete_texture_object(ctx, texObj); } @@ -1007,8 +758,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) * Called via ctx->Driver.NewTextureObject. * Note: this function will be called during context creation to * allocate the default texture objects. - * Note: we could use containment here to 'derive' the driver-specific - * texture object from the core mesa gl_texture_object. Not done at this time. * Fixup MaxAnisotropy according to user preference. */ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, @@ -1016,14 +765,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, GLenum target) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_object *obj; - obj = _mesa_new_texture_object(ctx, name, target); - if (!obj) - return NULL; - obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + r300TexObj* t = CALLOC_STRUCT(r300_tex_obj); + + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + t, _mesa_lookup_enum_by_nr(target)); + } + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy; + + /* Initialize hardware state */ + r300UpdateTexWrap(t); + r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); + r300SetTexBorderColor(t, t->base._BorderChan); - r300AllocTexObj(obj); - return obj; + return &t->base; } void r300InitTextureFuncs(struct dd_function_table *functions) @@ -1039,7 +797,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions) functions->TexSubImage2D = r300TexSubImage2D; functions->TexSubImage3D = r300TexSubImage3D; functions->NewTextureObject = r300NewTextureObject; - functions->BindTexture = r300BindTexture; functions->DeleteTexture = r300DeleteTexture; functions->IsTextureResident = driIsTextureResident; diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index b86d45b..5d7f21e 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -46,8 +46,6 @@ extern void r300UpdateTextureState(GLcontext * ctx); extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face); -extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t); - extern void r300InitTextureFuncs(struct dd_function_table *functions); #endif /* __r300_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index 69847a4..b3b501b 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -48,439 +48,15 @@ SOFTWARE. #include "r300_context.h" #include "r300_state.h" #include "r300_cmdbuf.h" +#include "r300_emit.h" +#include "r300_mipmap_tree.h" #include "radeon_ioctl.h" #include "r300_tex.h" #include "r300_ioctl.h" #include /* for usleep() */ -#ifdef USER_BUFFERS #include "r300_mem.h" -#endif -/** - * Destroy any device-dependent state associated with the texture. This may - * include NULLing out hardware state that points to the texture. - */ -void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) -{ - int i; - - if (RADEON_DEBUG & DEBUG_TEXTURE) { - fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, - (void *)t, (void *)t->base.tObj); - } - - for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { - if (rmesa->state.texture.unit[i].texobj == t) { - rmesa->state.texture.unit[i].texobj = NULL; - } - } -} - -/* ------------------------------------------------------------ - * Texture image conversions - */ - -static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, - r300TexObjPtr t, - struct gl_texture_image *texImage, - GLint hwlevel, - GLint x, GLint y, - GLint width, GLint height) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - GLuint srcPitch, dstPitch; - int blit_format; - int srcOffset; - - /* - * XXX it appears that we always upload the full image, not a subimage. - * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever - * changed, the src pitch will have to change. - */ - switch (texFormat->TexelBytes) { - case 1: - blit_format = R300_CP_COLOR_FORMAT_CI8; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 2: - blit_format = R300_CP_COLOR_FORMAT_RGB565; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 4: - blit_format = R300_CP_COLOR_FORMAT_ARGB8888; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 8: - case 16: - blit_format = R300_CP_COLOR_FORMAT_CI8; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - default: - return; - } - - t->image[0][hwlevel].data = texImage->Data; - srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data); - - assert(srcOffset != ~0); - - /* Don't currently need to cope with small pitches? - */ - width = texImage->Width; - height = texImage->Height; - - if (texFormat->TexelBytes > 4) { - width *= texFormat->TexelBytes; - } - - r300EmitWait(rmesa, R300_WAIT_3D); - - r300EmitBlit(rmesa, blit_format, - srcPitch, - srcOffset, - dstPitch, - t->bufAddr, - x, - y, - t->image[0][hwlevel].x + x, - t->image[0][hwlevel].y + y, width, height); - - r300EmitWait(rmesa, R300_WAIT_2D); -} - -static void r300UploadRectSubImage(r300ContextPtr rmesa, - r300TexObjPtr t, - struct gl_texture_image *texImage, - GLint x, GLint y, GLint width, GLint height) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - int blit_format, dstPitch, done; - - switch (texFormat->TexelBytes) { - case 1: - blit_format = R300_CP_COLOR_FORMAT_CI8; - break; - case 2: - blit_format = R300_CP_COLOR_FORMAT_RGB565; - break; - case 4: - blit_format = R300_CP_COLOR_FORMAT_ARGB8888; - break; - case 8: - case 16: - blit_format = R300_CP_COLOR_FORMAT_CI8; - break; - default: - return; - } - - t->image[0][0].data = texImage->Data; - - /* Currently don't need to cope with small pitches. - */ - width = texImage->Width; - height = texImage->Height; - dstPitch = t->pitch; - - if (texFormat->TexelBytes > 4) { - width *= texFormat->TexelBytes; - } - - if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { - /* In this case, could also use GART texturing. This is - * currently disabled, but has been tested & works. - */ - t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data); - t->pitch = texImage->RowStride * texFormat->TexelBytes - 32; - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "Using GART texturing for rectangular client texture\n"); - - /* Release FB memory allocated for this image: - */ - /* FIXME This may not be correct as driSwapOutTextureObject sets - * FIXME dirty_images. It may be fine, though. - */ - if (t->base.memBlock) { - driSwapOutTextureObject((driTextureObject *) t); - } - } else if (texImage->IsClientData) { - /* Data already in GART memory, with usable pitch. - */ - GLuint srcPitch; - srcPitch = texImage->RowStride * texFormat->TexelBytes; - r300EmitBlit(rmesa, - blit_format, - srcPitch, - r300GartOffsetFromVirtual(rmesa, texImage->Data), - dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); - } else { - /* Data not in GART memory, or bad pitch. - */ - for (done = 0; done < height;) { - struct r300_dma_region region; - int lines = - MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch); - int src_pitch; - char *tex; - - src_pitch = texImage->RowStride * texFormat->TexelBytes; - - tex = (char *)texImage->Data + done * src_pitch; - - memset(®ion, 0, sizeof(region)); - r300AllocDmaRegion(rmesa, ®ion, lines * dstPitch, - 1024); - - /* Copy texdata to dma: - */ - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "%s: src_pitch %d dst_pitch %d\n", - __FUNCTION__, src_pitch, dstPitch); - - if (src_pitch == dstPitch) { - memcpy(region.address + region.start, tex, - lines * src_pitch); - } else { - char *buf = region.address + region.start; - int i; - for (i = 0; i < lines; i++) { - memcpy(buf, tex, src_pitch); - buf += dstPitch; - tex += src_pitch; - } - } - - r300EmitWait(rmesa, R300_WAIT_3D); - - /* Blit to framebuffer - */ - r300EmitBlit(rmesa, - blit_format, - dstPitch, GET_START(®ion), - dstPitch | (t->tile_bits >> 16), - t->bufAddr, 0, 0, 0, done, width, lines); - - r300EmitWait(rmesa, R300_WAIT_2D); -#ifdef USER_BUFFERS - r300_mem_use(rmesa, region.buf->id); -#endif - - r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); - done += lines; - } - } -} - -/** - * Upload the texture image associated with texture \a t at the specified - * level at the address relative to \a start. - */ -static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, - GLint hwlevel, - GLint x, GLint y, GLint width, GLint height, - GLuint face) -{ - struct gl_texture_image *texImage = NULL; - GLuint offset; - GLint imageWidth, imageHeight; - GLint ret; - drm_radeon_texture_t tex; - drm_radeon_tex_image_t tmp; - const int level = hwlevel + t->base.firstLevel; - - if (RADEON_DEBUG & DEBUG_TEXTURE) { - fprintf(stderr, - "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", - __FUNCTION__, (void *)t, (void *)t->base.tObj, level, - width, height, face); - } - - ASSERT(face < 6); - - /* Ensure we have a valid texture to upload */ - if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { - _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); - return; - } - - texImage = t->base.tObj->Image[face][level]; - - if (!texImage) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: texImage %d is NULL!\n", - __FUNCTION__, level); - return; - } - if (!texImage->Data) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is NULL!\n", - __FUNCTION__); - return; - } - - if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - assert(level == 0); - assert(hwlevel == 0); - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is rectangular\n", - __FUNCTION__); - r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); - return; - } else if (texImage->IsClientData) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "%s: image data is in GART client storage\n", - __FUNCTION__); - r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, - width, height); - return; - } else if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is in normal memory\n", - __FUNCTION__); - - imageWidth = texImage->Width; - imageHeight = texImage->Height; - - offset = t->bufAddr; - - if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { - GLint imageX = 0; - GLint imageY = 0; - GLint blitX = t->image[face][hwlevel].x; - GLint blitY = t->image[face][hwlevel].y; - GLint blitWidth = t->image[face][hwlevel].width; - GLint blitHeight = t->image[face][hwlevel].height; - fprintf(stderr, " upload image: %d,%d at %d,%d\n", - imageWidth, imageHeight, imageX, imageY); - fprintf(stderr, " upload blit: %d,%d at %d,%d\n", - blitWidth, blitHeight, blitX, blitY); - fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n", - (GLuint) offset, hwlevel, level); - } - - t->image[face][hwlevel].data = texImage->Data; - - /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. - * NOTE: we're always use a 1KB-wide blit and I8 texture format. - * We used to use 1, 2 and 4-byte texels and used to use the texture - * width to dictate the blit width - but that won't work for compressed - * textures. (Brian) - * NOTE: can't do that with texture tiling. (sroland) - */ - tex.offset = offset; - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); - - if (texImage->TexFormat->TexelBytes > 4) { - const int log2TexelBytes = - (3 + (texImage->TexFormat->TexelBytes >> 4)); - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = - MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / - 64, 1); - tex.height = imageHeight; - tex.width = imageWidth << log2TexelBytes; - tex.offset += (tmp.x << log2TexelBytes) & ~1023; - tmp.x = tmp.x % (1024 >> log2TexelBytes); - tmp.width = tmp.width << log2TexelBytes; - } else if (texImage->TexFormat->TexelBytes) { - /* use multi-byte upload scheme */ - tex.height = imageHeight; - tex.width = imageWidth; - switch (texImage->TexFormat->TexelBytes) { - case 1: - tex.format = RADEON_TXFORMAT_I8; - break; - case 2: - tex.format = RADEON_TXFORMAT_AI88; - break; - case 4: - tex.format = RADEON_TXFORMAT_ARGB8888; - break; - } - tex.pitch = - MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / - 64, 1); - tex.offset += tmp.x & ~1023; - tmp.x = tmp.x % 1024; - - if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* need something like "tiled coordinates" ? */ - tmp.y = tmp.x / (tex.pitch * 128) * 2; - tmp.x = - tmp.x % (tex.pitch * 128) / 2 / - texImage->TexFormat->TexelBytes; - tex.pitch |= RADEON_DST_TILE_MICRO >> 22; - } else { - tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); - } -#if 1 - if ((t->tile_bits & R300_TXO_MACRO_TILE) && - (texImage->Width * texImage->TexFormat->TexelBytes >= 256) - && ((!(t->tile_bits & R300_TXO_MICRO_TILE) - && (texImage->Height >= 8)) - || (texImage->Height >= 16))) { - /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, - OR if height is smaller than 8 automatically, but if micro tiling is active - the limit is height 16 instead ? */ - tex.pitch |= RADEON_DST_TILE_MACRO >> 22; - } -#endif - } else { - /* In case of for instance 8x8 texture (2x2 dxt blocks), - padding after the first two blocks is needed (only - with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ - /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) - has 4 real pixels. Needed so the kernel module reads - the right amount of data. */ - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); - tex.height = (imageHeight + 3) / 4; - tex.width = (imageWidth + 3) / 4; - if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { - tex.width *= 8; - } else { - tex.width *= 16; - } - } - - LOCK_HARDWARE(&rmesa->radeon); - do { - ret = - drmCommandWriteRead(rmesa->radeon.dri.fd, - DRM_RADEON_TEXTURE, &tex, - sizeof(drm_radeon_texture_t)); - if (ret) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "DRM_RADEON_TEXTURE: again!\n"); - usleep(1); - } - } while (ret == -EAGAIN); - - UNLOCK_HARDWARE(&rmesa->radeon); - - if (ret) { - fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); - fprintf(stderr, " offset=0x%08x\n", offset); - fprintf(stderr, " image width=%d height=%d\n", - imageWidth, imageHeight); - fprintf(stderr, " blit width=%d height=%d data=%p\n", - t->image[face][hwlevel].width, - t->image[face][hwlevel].height, - t->image[face][hwlevel].data); - _mesa_exit(-1); - } -} /** * Upload the texture images associated with texture \a t. This might @@ -493,69 +69,32 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) { - const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; - if (t->image_override) return 0; + if (!t->mt) + return 0; if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { - fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, - (void *)rmesa->radeon.glCtx, (void *)t->base.tObj, - t->base.totalSize, t->base.firstLevel, - t->base.lastLevel); + fprintf(stderr, "%s( %p, %p ) lvls=%d-%d\n", __FUNCTION__, + (void *)rmesa->radeon.glCtx, t, + t->mt->firstLevel, t->mt->lastLevel); } - if (t->base.totalSize == 0) - return 0; - if (RADEON_DEBUG & DEBUG_SYNC) { fprintf(stderr, "%s: Syncing\n", __FUNCTION__); radeonFinish(rmesa->radeon.glCtx); } - LOCK_HARDWARE(&rmesa->radeon); - - if (t->base.memBlock == NULL) { - int heap; - - heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps, - (driTextureObject *) t); - if (heap == -1) { - UNLOCK_HARDWARE(&rmesa->radeon); - return -1; - } - - /* Set the base offset of the texture image */ - t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap] - + t->base.memBlock->ofs; - t->offset = t->bufAddr; - - if (!(t->base.tObj->Image[0][0]->IsClientData)) { - /* hope it's safe to add that here... */ - t->offset |= t->tile_bits; - } - } - - /* Let the world know we've used this memory recently. - */ - driUpdateTextureLRU((driTextureObject *) t); - UNLOCK_HARDWARE(&rmesa->radeon); - /* Upload any images that are new */ - if (t->base.dirty_images[face]) { - int i; + if (t->dirty_images[face]) { + int i, numLevels = t->mt->lastLevel - t->mt->firstLevel + 1; for (i = 0; i < numLevels; i++) { - if ((t->base. - dirty_images[face] & (1 << - (i + t->base.firstLevel))) != - 0) { - r300UploadSubImage(rmesa, t, i, 0, 0, - t->image[face][i].width, - t->image[face][i].height, - face); + if (t->dirty_images[face] & (1 << (i + t->mt->firstLevel))) { + r300_miptree_upload_image(t->mt, face, t->mt->firstLevel + i, + t->base.Image[face][t->mt->firstLevel + i]); } } - t->base.dirty_images[face] = 0; + t->dirty_images[face] = 0; } if (RADEON_DEBUG & DEBUG_SYNC) { diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index bdd20b1..1b24738 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_ioctl.h" #include "radeon_ioctl.h" +#include "r300_mipmap_tree.h" #include "r300_tex.h" #include "r300_reg.h" @@ -148,8 +149,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) if (!tObj) return; - t = (r300TexObjPtr) tObj->DriverData; - + t = r300_tex_obj(tObj); switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { case MESA_FORMAT_Z16: @@ -189,118 +189,59 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) } -/** - * Compute sizes and fill in offset and blit information for the given - * image (determined by \p face and \p level). - * - * \param curOffset points to the offset at which the image is to be stored - * and is updated by this function according to the size of the image. - */ -static void compute_tex_image_offset( - struct gl_texture_object *tObj, - GLuint face, - GLint level, - GLint* curOffset) +static void calculate_first_last_level(struct gl_texture_object *tObj, + GLuint *pfirstLevel, GLuint *plastLevel) { - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; - const struct gl_texture_image* texImage; - GLuint blitWidth = R300_BLIT_WIDTH_BYTES; - GLuint texelBytes; - GLuint size; - - texImage = tObj->Image[0][level + t->base.firstLevel]; - if (!texImage) - return; - - texelBytes = texImage->TexFormat->TexelBytes; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - if ((t->format & R300_TX_FORMAT_DXT1) == - R300_TX_FORMAT_DXT1) { - // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; + const struct gl_texture_image * const baseImage = + tObj->Image[0][tObj->BaseLevel]; + + /* These must be signed values. MinLod and MaxLod can be negative numbers, + * and having firstLevel and lastLevel as signed prevents the need for + * extra sign checks. + */ + int firstLevel; + int lastLevel; + + /* Yes, this looks overly complicated, but it's all needed. + */ + switch (tObj->Target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + case GL_TEXTURE_CUBE_MAP: + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. + */ + firstLevel = lastLevel = tObj->BaseLevel; } else { - /* DXT3/5, 16 bytes per block */ - WARN_ONCE - ("DXT 3/5 suffers from multitexturing problems!\n"); - // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; + firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); + firstLevel = MAX2(firstLevel, tObj->BaseLevel); + firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); + lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); + lastLevel = MAX2(lastLevel, tObj->BaseLevel); + lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); + lastLevel = MIN2(lastLevel, tObj->MaxLevel); + lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ } - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = - ((texImage->Width * texelBytes + - 63) & ~63) * texImage->Height; - blitWidth = 64 / texelBytes; - } else if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = - (w * ((texImage->Height + 1) / 2)) * - texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", - texImage->Width, texImage->Height, - texImage->Depth, - texImage->TexFormat->TexelBytes, - texImage->InternalFormat); - - /* All images are aligned to a 32-byte offset */ - *curOffset = (*curOffset + 0x1f) & ~0x1f; - - if (texelBytes) { - /* fix x and y coords up later together with offset */ - t->image[face][level].x = *curOffset; - t->image[face][level].y = 0; - t->image[face][level].width = - MIN2(size / texelBytes, blitWidth); - t->image[face][level].height = - (size / texelBytes) / t->image[face][level].width; - } else { - t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; - t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; - t->image[face][level].width = - MIN2(size, R300_BLIT_WIDTH_BYTES); - t->image[face][level].height = size / t->image[face][level].width; + break; + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_4D_SGIS: + firstLevel = lastLevel = 0; + break; + default: + return; } - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", - level, face, texImage->Width, texImage->Height, - t->image[face][level].x, t->image[face][level].y, - t->image[face][level].width, t->image[face][level].height, - size, *curOffset); - - *curOffset += size; + /* save these values */ + *pfirstLevel = firstLevel; + *plastLevel = lastLevel; } - /** - * This function computes the number of bytes of storage needed for - * the given texture object (all mipmap levels, all cube faces). - * The \c image[face][level].x/y/width/height parameters for upload/blitting - * are computed here. \c filter, \c format, etc. will be set here - * too. + * This function ensures a validated miptree is available. + * + * Additionally, some texture format bits are configured here. * * \param rmesa Context pointer * \param tObj GL texture object whose images are to be posted to @@ -309,13 +250,13 @@ static void compute_tex_image_offset( static void r300SetTexImages(r300ContextPtr rmesa, struct gl_texture_object *tObj) { - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + r300TexObjPtr t = r300_tex_obj(tObj); const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset; - GLint i, texelBytes; - GLint numLevels; - GLint log2Width, log2Height, log2Depth; + GLint texelBytes; + GLuint firstLevel = 0, lastLevel = 0; + + calculate_first_last_level(tObj, &firstLevel, &lastLevel); /* Set the hardware texture format */ @@ -335,107 +276,59 @@ static void r300SetTexImages(r300ContextPtr rmesa, } texelBytes = baseImage->TexFormat->TexelBytes; - - /* Compute which mipmap levels we really want to send to the hardware. - */ - driCalculateTextureFirstLastLevel((driTextureObject *) t); - log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; - log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; - log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; - - numLevels = t->base.lastLevel - t->base.firstLevel + 1; - - assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); - - /* Calculate mipmap offsets and dimensions for blitting (uploading) - * The idea is that we lay out the mipmap levels within a block of - * memory organized as a rectangle of width BLIT_WIDTH_BYTES. - */ t->tile_bits = 0; - /* figure out if this texture is suitable for tiling. */ -#if 0 /* Disabled for now */ - if (texelBytes) { - if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && - /* texrect might be able to use micro tiling too in theory? */ - (baseImage->Height > 1)) { - - /* allow 32 (bytes) x 1 mip (which will use two times the space - the non-tiled version would use) max if base texture is large enough */ - if ((numLevels == 1) || - (((baseImage->Width * texelBytes / - baseImage->Height) <= 32) - && (baseImage->Width * texelBytes > 64)) - || - ((baseImage->Width * texelBytes / - baseImage->Height) <= 16)) { - t->tile_bits |= R300_TXO_MICRO_TILE; - } - } + if (tObj->Target == GL_TEXTURE_CUBE_MAP) + t->format |= R300_TX_FORMAT_CUBIC_MAP; - if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { - /* we can set macro tiling even for small textures, they will be untiled anyway */ - t->tile_bits |= R300_TXO_MACRO_TILE; + if (!t->image_override) { + GLuint compressed = baseImage->IsCompressed ? baseImage->TexFormat->MesaFormat : 0; + + if (t->mt) { + if (t->mt->firstLevel != firstLevel || + t->mt->lastLevel != lastLevel || + t->mt->width0 != baseImage->Width || + t->mt->height0 != baseImage->Height || + t->mt->depth0 != baseImage->Depth || + t->mt->bpp != texelBytes || + t->mt->tilebits != t->tile_bits || + t->mt->compressed != compressed) { + r300_miptree_destroy(t->mt); + t->mt = 0; + } } - } -#endif - - curOffset = 0; - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - ASSERT(log2Width == log2Height); - t->format |= R300_TX_FORMAT_CUBIC_MAP; - - for(i = 0; i < numLevels; i++) { - GLuint face; - for(face = 0; face < 6; face++) - compute_tex_image_offset(tObj, face, i, &curOffset); + if (!t->mt) { + t->mt = r300_miptree_create(rmesa, t, tObj->Target, + firstLevel, lastLevel, + baseImage->Width, baseImage->Height, baseImage->Depth, + texelBytes, t->tile_bits, compressed); + memset(t->dirty_images, 0xff, sizeof(t->dirty_images)); } - } else { - for (i = 0; i < numLevels; i++) - compute_tex_image_offset(tObj, 0, i, &curOffset); } - /* Align the total size of texture memory block. - */ - t->base.totalSize = - (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; - - t->size = - (((tObj->Image[0][t->base.firstLevel]->Width - - 1) << R300_TX_WIDTHMASK_SHIFT) - | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << - R300_TX_HEIGHTMASK_SHIFT)) - | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); - + t->size = (((tObj->Image[0][firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT) + | ((tObj->Image[0][firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)) + | ((lastLevel - firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT); t->pitch = 0; - /* Only need to round to nearest 32 for textures, but the blitter - * requires 64-byte aligned pitches, and we may/may not need the - * blitter. NPOT only! - */ if (baseImage->IsCompressed) { - t->pitch |= - (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); + t->pitch |= (tObj->Image[0][firstLevel]->Width + 63) & ~(63); } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { unsigned int align = (64 / texelBytes) - 1; - t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * + t->pitch |= ((tObj->Image[0][firstLevel]->Width * texelBytes) + 63) & ~(63); t->size |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) - t->pitch_reg = - (((tObj->Image[0][t->base.firstLevel]->Width) + - align) & ~align) - 1; + t->pitch_reg = (((tObj->Image[0][firstLevel]->Width) + align) & ~align) - 1; } else { - t->pitch |= - ((tObj->Image[0][t->base.firstLevel]->Width * - texelBytes) + 63) & ~(63); + t->pitch |= ((tObj->Image[0][firstLevel]->Width * texelBytes) + 63) & ~(63); } if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - if (tObj->Image[0][t->base.firstLevel]->Width > 2048) + if (tObj->Image[0][firstLevel]->Width > 2048) t->pitch_reg |= R500_TXWIDTH_BIT11; - if (tObj->Image[0][t->base.firstLevel]->Height > 2048) + if (tObj->Image[0][firstLevel]->Height > 2048) t->pitch_reg |= R500_TXHEIGHT_BIT11; } } @@ -449,17 +342,15 @@ static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + r300TexObjPtr t = r300_tex_obj(tObj); ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); - if (t->base.dirty_images[0]) { + if (!t->mt || t->dirty_images[0]) { R300_FIREVERTICES(rmesa); r300SetTexImages(rmesa, tObj); - r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); - if (!t->base.memBlock && !t->image_override) - return GL_FALSE; + r300UploadTexImages(rmesa, t, 0); } return GL_TRUE; @@ -470,7 +361,7 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + r300TexObjPtr t = r300_tex_obj(tObj); ASSERT(tObj->Target == GL_TEXTURE_3D); @@ -479,12 +370,10 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) return GL_FALSE; } - if (t->base.dirty_images[0]) { + if (!t->mt || t->dirty_images[0]) { R300_FIREVERTICES(rmesa); r300SetTexImages(rmesa, tObj); - r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); - if (!t->base.memBlock) - return GL_FALSE; + r300UploadTexImages(rmesa, t, 0); } return GL_TRUE; @@ -495,14 +384,15 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + r300TexObjPtr t = r300_tex_obj(tObj); GLuint face; ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); - if (t->base.dirty_images[0] || t->base.dirty_images[1] || - t->base.dirty_images[2] || t->base.dirty_images[3] || - t->base.dirty_images[4] || t->base.dirty_images[5]) { + if (!t->mt || + t->dirty_images[0] || t->dirty_images[1] || + t->dirty_images[2] || t->dirty_images[3] || + t->dirty_images[4] || t->dirty_images[5]) { /* flush */ R300_FIREVERTICES(rmesa); /* layout memory space, once for all faces */ @@ -511,18 +401,11 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) /* upload (per face) */ for (face = 0; face < 6; face++) { - if (t->base.dirty_images[face]) { - r300UploadTexImages(rmesa, - (r300TexObjPtr) tObj->DriverData, - face); + if (t->dirty_images[face]) { + r300UploadTexImages(rmesa, t, face); } } - if (!t->base.memBlock) { - /* texmem alloc failed, use s/w fallback */ - return GL_FALSE; - } - return GL_TRUE; } @@ -531,18 +414,15 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + r300TexObjPtr t = r300_tex_obj(tObj); ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); - if (t->base.dirty_images[0]) { + if (!t->mt || t->dirty_images[0]) { R300_FIREVERTICES(rmesa); r300SetTexImages(rmesa, tObj); - r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); - if (!t->base.memBlock && !t->image_override && - !rmesa->prefer_gart_client_texturing) - return GL_FALSE; + r300UploadTexImages(rmesa, t, 0); } return GL_TRUE; @@ -550,34 +430,19 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + r300TexObjPtr t = r300_tex_obj(tObj); /* Fallback if there's a texture border */ if (tObj->Image[0][tObj->BaseLevel]->Border > 0) return GL_FALSE; - /* Update state if this is a different texture object to last - * time. - */ - if (rmesa->state.texture.unit[unit].texobj != t) { - if (rmesa->state.texture.unit[unit].texobj != NULL) { - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ - - rmesa->state.texture.unit[unit].texobj->base.bound &= - ~(1 << unit); - } - - rmesa->state.texture.unit[unit].texobj = t; - t->base.bound |= (1 << unit); - driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ - } + /* Fallback if memory upload didn't work */ + if (!t->mt) + return GL_FALSE; - return !t->border_fallback; + return GL_TRUE; } void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, @@ -586,20 +451,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, r300ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); - r300TexObjPtr t; + r300TexObjPtr t = r300_tex_obj(tObj); uint32_t pitch_val; if (!tObj) return; - t = (r300TexObjPtr) tObj->DriverData; - t->image_override = GL_TRUE; if (!offset) return; - t->offset = offset; + t->override_offset = offset; t->pitch_reg &= (1 << 13) -1; pitch_val = pitch; diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c index 3fc724a..a84c8fc 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ b/src/mesa/drivers/dri/r300/radeon_context.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "state.h" #include "matrix.h" #include "framebuffer.h" +#include "drirenderbuffer.h" #include "drivers/common/driverfuncs.h" #include "swrast/swrast.h" @@ -258,6 +259,52 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, } } +static void +radeon_make_renderbuffer_current(radeonContextPtr radeon, + GLframebuffer *draw) +{ + int size = radeon->radeonScreen->driScreen->fbSize; + void *map = 0; + /* if radeon->fake */ + struct radeon_renderbuffer *rb; + uint32_t offset; + if (!radeon->bufmgr) + return; + + if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { + + offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->front.offset : radeon->radeonScreen->frontOffset; + if (!rb->bo) + rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "front buffer", + radeon->radeonScreen->frontOffset, size, map, + DRM_BO_FLAG_MEM_VRAM); + fprintf(stderr,"front is %p\n", rb->bo); + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->frontPitch; + } + if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { + offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->back.offset : radeon->radeonScreen->backOffset; + if (!rb->bo) + rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "back buffer", + radeon->radeonScreen->backOffset, size, map, + DRM_BO_FLAG_MEM_VRAM); + fprintf(stderr,"back is %p\n", rb->bo); + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->backPitch; + } + if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) { + offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->depth.offset : radeon->radeonScreen->depthOffset; + if (!rb->bo) + rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "depth buffer", + radeon->radeonScreen->depthOffset, size, map, + DRM_BO_FLAG_MEM_VRAM); + fprintf(stderr,"depth is %p\n", rb->bo); + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->depthPitch; + } +} + + /* Force the context `c' to be the current context and associate with it * buffer `b'. */ @@ -265,51 +312,57 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, __DRIdrawablePrivate * driDrawPriv, __DRIdrawablePrivate * driReadPriv) { - if (driContextPriv) { - radeonContextPtr radeon = - (radeonContextPtr) driContextPriv->driverPrivate; + radeonContextPtr radeon; + GLframebuffer *dfb, *rfb; + if (!driContextPriv) { if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, - radeon->glCtx); - - if (radeon->dri.drawable != driDrawPriv) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - driDrawPriv->vblFlags = - (radeon->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&radeon-> - optionCache) - : VBLANK_FLAG_NO_IRQ; + fprintf(stderr, "%s ctx is null\n", __FUNCTION__); + _mesa_make_current(NULL, NULL, NULL); + return GL_TRUE; + } - driDrawableInitVBlank(driDrawPriv); - } - } + radeon = (radeonContextPtr) driContextPriv->driverPrivate; + dfb = driDrawPriv->driverPrivate; + rfb = driReadPriv->driverPrivate; - radeon->dri.readable = driReadPriv; + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx); - if (radeon->dri.drawable != driDrawPriv || - radeon->lastStamp != driDrawPriv->lastStamp) { - radeon->dri.drawable = driDrawPriv; + driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); + if (driReadPriv != driDrawPriv) + driUpdateFramebufferSize(radeon->glCtx, driReadPriv); - radeonSetCliprects(radeon); - r300UpdateViewportOffset(radeon->glCtx); - } + radeon_make_renderbuffer_current(radeon, dfb); - _mesa_make_current(radeon->glCtx, - (GLframebuffer *) driDrawPriv-> - driverPrivate, - (GLframebuffer *) driReadPriv-> - driverPrivate); + _mesa_make_current(radeon->glCtx, dfb, rfb); - _mesa_update_state(radeon->glCtx); + if (radeon->dri.drawable != driDrawPriv) { + if (driDrawPriv->swap_interval == (unsigned)-1) { + driDrawPriv->vblFlags = + (radeon->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&radeon-> + optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank(driDrawPriv); + } + } - radeonUpdatePageFlipping(radeon); - } else { - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx is null\n", __FUNCTION__); - _mesa_make_current(0, 0, 0); + radeon->dri.readable = driReadPriv; + + if (radeon->dri.drawable != driDrawPriv || + radeon->lastStamp != driDrawPriv->lastStamp) { + radeon->dri.drawable = driDrawPriv; + + radeonSetCliprects(radeon); + r300UpdateViewportOffset(radeon->glCtx); } + _mesa_update_state(radeon->glCtx); + + radeonUpdatePageFlipping(radeon); + if (RADEON_DEBUG & DEBUG_DRI) fprintf(stderr, "End %s\n", __FUNCTION__); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h index 7458d63..828853b 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.h +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drm.h" #include "dri_util.h" #include "colormac.h" +#include "radeon_buffer.h" struct radeon_context; typedef struct radeon_context radeonContextRec; @@ -132,12 +133,13 @@ struct radeon_scissor_state { struct radeon_colorbuffer_state { GLuint clear; - GLint drawOffset, drawPitch; + struct radeon_renderbuffer *rrb; }; struct radeon_state { struct radeon_colorbuffer_state color; struct radeon_scissor_state scissor; + struct radeon_renderbuffer *depth_buffer; }; /** @@ -185,6 +187,8 @@ struct radeon_context { /* Configuration cache */ driOptionCache optionCache; + + struct radeon_bufmgr *bufmgr; }; #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c index 0c1a195..486ce8e 100644 --- a/src/mesa/drivers/dri/r300/radeon_ioctl.c +++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "r300_context.h" #include "radeon_ioctl.h" +#include "radeon_buffer.h" #include "r300_ioctl.h" #include "r300_state.h" #include "radeon_reg.h" @@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, assert(dPriv->driContextPriv->driverPrivate); radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - + if (RADEON_DEBUG & DEBUG_IOCTL) { fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)radeon->glCtx); @@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) GLint ret; GLboolean missed_target; __DRIscreenPrivate *psp = dPriv->driScreenPriv; + GLframebuffer *fb = dPriv->driverPrivate; + struct radeon_renderbuffer *rrb; assert(dPriv); assert(dPriv->driContextPriv); @@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + if (RADEON_DEBUG & DEBUG_IOCTL) { fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, radeon->sarea->pfCurrentPage); @@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv) radeon->swap_count++; (void)(*psp->systemTime->getUST) (&radeon->swap_ust); - driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, + driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, radeon->sarea->pfCurrentPage); - if (radeon->sarea->pfCurrentPage == 1) { - radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; - } else { - radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; - } - - if (IS_R300_CLASS(radeon->radeonScreen)) { - r300ContextPtr r300 = (r300ContextPtr)radeon; - R300_STATECHANGE(r300, cb); - r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; - - if (r300->radeon.radeonScreen->cpp == 4) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; - else - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; - } + radeon->state.color.rrb = rrb; } void radeonWaitForIdleLocked(radeonContextPtr radeon) diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c index d54a821..3529555 100644 --- a/src/mesa/drivers/dri/r300/radeon_lock.c +++ b/src/mesa/drivers/dri/r300/radeon_lock.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_state.h" #include "r300_context.h" #include "r300_state.h" +#include "r300_mem.h" #include "framebuffer.h" @@ -59,6 +60,8 @@ int prevLockLine = 0; void radeonUpdatePageFlipping(radeonContextPtr rmesa) { int use_back; + __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; + GLframebuffer *fb = drawable->driverPrivate; rmesa->doPageFlip = rmesa->sarea->pfState; if (rmesa->glCtx->WinSysDrawBuffer) { @@ -72,16 +75,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa) BUFFER_BACK_LEFT) : 1; use_back ^= (rmesa->sarea->pfCurrentPage == 1); - if (use_back) { - rmesa->state.color.drawOffset = - rmesa->radeonScreen->backOffset; - rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch; - } else { - rmesa->state.color.drawOffset = - rmesa->radeonScreen->frontOffset; - rmesa->state.color.drawPitch = - rmesa->radeonScreen->frontPitch; - } + if (use_back) + rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + else + rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + + rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer; } /* Update the hardware state. This is called if another context has @@ -125,12 +124,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) } if (sarea->ctx_owner != rmesa->dri.hwContext) { - int i; - sarea->ctx_owner = rmesa->dri.hwContext; - for (i = 0; i < r300->nr_heaps; i++) { - DRI_AGE_TEXTURES(r300->texture_heaps[i]); - } + radeonBufmgrContendedLockTake(r300->radeon.bufmgr); } rmesa->lost_context = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c index f1bc56e..7ea0842 100644 --- a/src/mesa/drivers/dri/r300/radeon_span.c +++ b/src/mesa/drivers/dri/r300/radeon_span.c @@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "radeon_span.h" -#include "drirenderbuffer.h" +#include "radeon_buffer.h" #define DBG 0 @@ -58,21 +58,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * information. */ #define LOCAL_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + struct radeon_renderbuffer *rrb = (void *) rb; \ + const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ const GLuint bottom = dPriv->h - 1; \ - GLubyte *buf = (GLubyte *) drb->flippedData \ - + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ + GLubyte *buf = (GLubyte *) rrb->bo->virtual \ + + (dPriv->y * rrb->pitch + dPriv->x) * rrb->cpp; \ GLuint p; \ (void) p; #define LOCAL_DEPTH_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + struct radeon_renderbuffer *rrb = (void *) rb; \ + const __DRIdrawablePrivate *dPriv = rrb->dPriv; \ const GLuint bottom = dPriv->h - 1; \ GLuint xo = dPriv->x; \ GLuint yo = dPriv->y; \ - GLubyte *buf = (GLubyte *) drb->Base.Data; + GLubyte *buf = (GLubyte *) rrb->base.Data; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -93,7 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 2) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -103,7 +103,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 4) #include "spantmp2.h" /* ================================================================ @@ -120,10 +120,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * too... */ -static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) +static GLuint radeon_mba_z32(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) { - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { + GLuint pitch = rrb->pitch; + if (rrb->depthHasSurface) { return 4 * (x + y * pitch); } else { GLuint ba, address = 0; /* a[0..1] = 0 */ @@ -148,10 +149,10 @@ static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) } static INLINE GLuint -radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) +radeon_mba_z16(const struct radeon_renderbuffer *rrb, GLint x, GLint y) { - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { + GLuint pitch = rrb->pitch; + if (rrb->depthHasSurface) { return 2 * (x + y * pitch); } else { GLuint ba, address = 0; /* a[0] = 0 */ @@ -173,10 +174,10 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) /* 16-bit depth buffer functions */ #define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; + *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )) = d; #define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); + d = *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )); #define TAG(x) radeon##x##_z16 #include "depthtmp.h" @@ -189,7 +190,7 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) #ifdef COMPILE_R300 #define WRITE_DEPTH( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ GLuint tmp = *(GLuint *)(buf + offset); \ tmp &= 0x000000ff; \ tmp |= ((d << 8) & 0xffffff00); \ @@ -198,7 +199,7 @@ do { \ #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ GLuint tmp = *(GLuint *)(buf + offset); \ tmp &= 0xff000000; \ tmp |= ((d) & 0x00ffffff); \ @@ -209,12 +210,12 @@ do { \ #ifdef COMPILE_R300 #define READ_DEPTH( d, _x, _y ) \ do { \ - d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ + d = (*(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo, \ _y + yo )) & 0xffffff00) >> 8; \ }while(0) #else #define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ + d = *(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo, \ _y + yo )) & 0x00ffffff; #endif @@ -230,7 +231,7 @@ do { \ #ifdef COMPILE_R300 #define WRITE_STENCIL( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ GLuint tmp = *(GLuint *)(buf + offset); \ tmp &= 0xffffff00; \ tmp |= (d) & 0xff; \ @@ -239,7 +240,7 @@ do { \ #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ GLuint tmp = *(GLuint *)(buf + offset); \ tmp &= 0x00ffffff; \ tmp |= (((d) & 0xff) << 24); \ @@ -250,14 +251,14 @@ do { \ #ifdef COMPILE_R300 #define READ_STENCIL( d, _x, _y ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ GLuint tmp = *(GLuint *)(buf + offset); \ d = tmp & 0x000000ff; \ } while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo ); \ GLuint tmp = *(GLuint *)(buf + offset); \ d = (tmp & 0xff000000) >> 24; \ } while (0) @@ -300,10 +301,10 @@ static void radeonSpanRenderStart(GLcontext * ctx) */ { int p; - driRenderbuffer *drb = - (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; + struct radeon_renderbuffer *rrb = + (void *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; volatile int *buf = - (volatile int *)(rmesa->dri.screen->pFB + drb->offset); + (volatile int *)(rmesa->dri.screen->pFB + rrb->bo->offset); p = *buf; } } @@ -326,20 +327,17 @@ void radeonInitSpanFuncs(GLcontext * ctx) /** * Plug in the Get/Put routines for the given driRenderbuffer. */ -void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) +void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) { - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 6 - && vis->blueBits == 5) { - radeonInitPointers_RGB565(&drb->Base); - } else { - radeonInitPointers_ARGB8888(&drb->Base); - } - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - radeonInitDepthPointers_z16(&drb->Base); - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - radeonInitDepthPointers_z24_s8(&drb->Base); - } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - radeonInitStencilPointers_z24_s8(&drb->Base); - } + if (rrb->base.InternalFormat == GL_RGB5) { + radeonInitPointers_RGB565(&rrb->base); + } else if (rrb->base.InternalFormat == GL_RGBA8) { + radeonInitPointers_ARGB8888(&rrb->base); + } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) { + radeonInitDepthPointers_z16(&rrb->base); + } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) { + radeonInitDepthPointers_z24_s8(&rrb->base); + } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) { + radeonInitStencilPointers_z24_s8(&rrb->base); + } } diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c index d81318c..a7720da 100644 --- a/src/mesa/drivers/dri/r300/radeon_state.c +++ b/src/mesa/drivers/dri/r300/radeon_state.c @@ -222,14 +222,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) void radeonInitState(radeonContextPtr radeon) { radeon->Fallback = 0; - - if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) { - radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; - } else { - radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; - } } diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h new file mode 100644 index 0000000..730c40b --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software") + * to deal in the software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * them Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Adam Jackson + */ + +#ifndef RADEON_BUFFER_H +#define RADEON_BUFFER_H + +#include "dri_bufmgr.h" + +struct radeon_renderbuffer +{ + struct gl_renderbuffer base; + dri_bo *bo; + unsigned int cpp; + /* unsigned int offset; */ + unsigned int pitch; + unsigned int height; + + /* boo Xorg 6.8.2 compat */ + int depthHasSurface; + + __DRIdrawablePrivate *dPriv; +}; + +struct radeon_bufmgr { + dri_bufmgr base; +}; + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 84b5c46..10a49d2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_chipset.h" #include "radeon_macros.h" #include "radeon_screen.h" +#include "radeon_buffer.h" #if !RADEON_COMMON #include "radeon_context.h" #include "radeon_span.h" @@ -69,6 +70,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "GL/internal/dri_interface.h" +#include +#include + /* Radeon configuration */ #include "xmlpool.h" @@ -350,6 +354,79 @@ static const __DRItexOffsetExtension r300texOffsetExtension = { }; #endif + +static void +radeon_gem_update_handle(radeonScreenPtr screen, __DRIscreenPrivate *sPriv, + struct radeon_gem_object *gem_obj) +{ + struct drm_gem_close close_args; + struct drm_gem_open args; + struct drm_radeon_gem_mmap mmap_args; + struct drm_radeon_gem_pin pin_args; + int ret; + + if (gem_obj->gem_handle) { + close_args.handle = gem_obj->gem_handle; + + ioctl(sPriv->fd, DRM_IOCTL_GEM_CLOSE, &close_args); + gem_obj->gem_handle = 0; + } + + /* do open */ + args.name = gem_obj->gem_name; + ret = ioctl(sPriv->fd, DRM_IOCTL_GEM_OPEN, &args); + if (ret) + return; + + gem_obj->gem_handle = args.handle; + gem_obj->size = args.size; + + mmap_args.handle = gem_obj->gem_handle; + mmap_args.size = gem_obj->size; + mmap_args.offset = 0; + + ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_MMAP, &mmap_args, + sizeof(mmap_args)); + + if (ret) + return; + + gem_obj->map = (void *)(unsigned long)(mmap_args.addr_ptr); + + pin_args.handle = gem_obj->gem_handle; + pin_args.alignment = 0; + + ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_PIN, &pin_args, + sizeof(pin_args)); + + if (ret) + return; + + gem_obj->offset = pin_args.offset; + + fprintf(stderr,"handle %d, size %llx, ptr %p, offset %llx\n", gem_obj->gem_handle, + gem_obj->size, gem_obj->map, gem_obj->offset); +} + +static int +radeon_init_mm_buffers(radeonScreenPtr screen, __DRIscreenPrivate *sPriv, + RADEONDRIPtr dri_priv) +{ + /* STOP GAP HERE */ + + screen->front.gem_name = dri_priv->frontOffset; + radeon_gem_update_handle(screen, sPriv, &screen->front); + screen->back.gem_name = dri_priv->backOffset; + radeon_gem_update_handle(screen, sPriv, &screen->back); + screen->depth.gem_name = dri_priv->depthOffset; + radeon_gem_update_handle(screen, sPriv, &screen->depth); + + screen->vram_texture.gem_name = dri_priv->textureOffset; + radeon_gem_update_handle(screen, sPriv, &screen->vram_texture); + screen->vram_texture.gem_name = dri_priv->gartTexHandle; + radeon_gem_update_handle(screen, sPriv, &screen->gart_texture); +} + /* Create the device specific screen private data struct. */ static radeonScreenPtr @@ -389,6 +466,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); { int ret; + +#ifdef RADEON_PARAM_KERNEL_MM + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM, + &screen->kernel_mm); + + if (ret && ret != -EINVAL) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret); + return NULL; + } + + if (ret == -EINVAL) + screen->kernel_mm = 0; +#endif + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, &screen->gart_buffer_offset); @@ -422,32 +514,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); } - screen->mmio.handle = dri_priv->registerHandle; - screen->mmio.size = dri_priv->registerSize; - if ( drmMap( sPriv->fd, - screen->mmio.handle, - screen->mmio.size, - &screen->mmio.map ) ) { - FREE( screen ); - __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); - return NULL; - } + if (!screen->kernel_mm) { + screen->mmio.handle = dri_priv->registerHandle; + screen->mmio.size = dri_priv->registerSize; + if ( drmMap( sPriv->fd, + screen->mmio.handle, + screen->mmio.size, + &screen->mmio.map ) ) { + FREE( screen ); + __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); + return NULL; + } - RADEONMMIO = screen->mmio.map; + RADEONMMIO = screen->mmio.map; - screen->status.handle = dri_priv->statusHandle; - screen->status.size = dri_priv->statusSize; - if ( drmMap( sPriv->fd, - screen->status.handle, - screen->status.size, - &screen->status.map ) ) { - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); - return NULL; + screen->status.handle = dri_priv->statusHandle; + screen->status.size = dri_priv->statusSize; + if ( drmMap( sPriv->fd, + screen->status.handle, + screen->status.size, + &screen->status.map ) ) { + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); + return NULL; + } + screen->scratch = (__volatile__ u_int32_t *) + ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); } - screen->scratch = (__volatile__ u_int32_t *) - ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); screen->buffers = drmMapBufs( sPriv->fd ); if ( !screen->buffers ) { @@ -458,22 +552,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) return NULL; } - if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { - screen->gartTextures.handle = dri_priv->gartTexHandle; - screen->gartTextures.size = dri_priv->gartTexMapSize; - if ( drmMap( sPriv->fd, - screen->gartTextures.handle, - screen->gartTextures.size, - (drmAddressPtr)&screen->gartTextures.map ) ) { - drmUnmapBufs( screen->buffers ); - drmUnmap( screen->status.map, screen->status.size ); - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); - return NULL; + if (!screen->kernel_mm) { + if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { + screen->gartTextures.handle = dri_priv->gartTexHandle; + screen->gartTextures.size = dri_priv->gartTexMapSize; + if ( drmMap( sPriv->fd, + screen->gartTextures.handle, + screen->gartTextures.size, + (drmAddressPtr)&screen->gartTextures.map ) ) { + drmUnmapBufs( screen->buffers ); + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); + return NULL; + } + + screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; } - - screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; } screen->chip_flags = 0; @@ -840,7 +936,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION, &temp); if (ret) { - if (screen->chip_family < CHIP_FAMILY_RS690) + if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm) screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; else { FREE( screen ); @@ -881,55 +977,58 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } } - if ( sPriv->drm_version.minor >= 10 ) { - drm_radeon_setparam_t sp; + if (!screen->kernel_mm) { + if ( sPriv->drm_version.minor >= 10 ) { + drm_radeon_setparam_t sp; - sp.param = RADEON_SETPARAM_FB_LOCATION; - sp.value = screen->fbLocation; + sp.param = RADEON_SETPARAM_FB_LOCATION; + sp.value = screen->fbLocation; - drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, - &sp, sizeof( sp ) ); - } - - screen->frontOffset = dri_priv->frontOffset; - screen->frontPitch = dri_priv->frontPitch; - screen->backOffset = dri_priv->backOffset; - screen->backPitch = dri_priv->backPitch; - screen->depthOffset = dri_priv->depthOffset; - screen->depthPitch = dri_priv->depthPitch; - - /* Check if ddx has set up a surface reg to cover depth buffer */ - screen->depthHasSurface = (sPriv->ddx_version.major > 4) || - /* these chips don't use tiled z without hyperz. So always pretend - we have set up a surface which will cause linear reads/writes */ - ((screen->chip_family & RADEON_CLASS_R100) && - !(screen->chip_flags & RADEON_CHIPSET_TCL)); - - if ( dri_priv->textureSize == 0 ) { - screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; - screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; - screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = - dri_priv->log2GARTTexGran; - } else { - screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset - + screen->fbLocation; - screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; - screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = - dri_priv->log2TexGran; - } + drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, + &sp, sizeof( sp ) ); + } - if ( !screen->gartTextures.map || dri_priv->textureSize == 0 - || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { - screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; - screen->texOffset[RADEON_GART_TEX_HEAP] = 0; - screen->texSize[RADEON_GART_TEX_HEAP] = 0; - screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; + screen->frontOffset = dri_priv->frontOffset; + screen->frontPitch = dri_priv->frontPitch; + screen->backOffset = dri_priv->backOffset; + screen->backPitch = dri_priv->backPitch; + screen->depthOffset = dri_priv->depthOffset; + screen->depthPitch = dri_priv->depthPitch; + + /* Check if ddx has set up a surface reg to cover depth buffer */ + screen->depthHasSurface = (sPriv->ddx_version.major > 4) || + /* these chips don't use tiled z without hyperz. So always pretend + we have set up a surface which will cause linear reads/writes */ + ((screen->chip_family & RADEON_CLASS_R100) && + !(screen->chip_flags & RADEON_CHIPSET_TCL)); + + if ( dri_priv->textureSize == 0 ) { + screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; + screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; + screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = + dri_priv->log2GARTTexGran; + } else { + screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset + + screen->fbLocation; + screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; + screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = + dri_priv->log2TexGran; + } + + if ( !screen->gartTextures.map || dri_priv->textureSize == 0 + || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { + screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; + screen->texOffset[RADEON_GART_TEX_HEAP] = 0; + screen->texSize[RADEON_GART_TEX_HEAP] = 0; + screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; + } else { + screen->numTexHeaps = RADEON_NR_TEX_HEAPS; + screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; + screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; + screen->logTexGranularity[RADEON_GART_TEX_HEAP] = dri_priv->log2GARTTexGran; + } } else { - screen->numTexHeaps = RADEON_NR_TEX_HEAPS; - screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; - screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; - screen->logTexGranularity[RADEON_GART_TEX_HEAP] = - dri_priv->log2GARTTexGran; + radeon_init_mm_buffers(screen, sPriv, dri_priv); } i = 0; @@ -975,12 +1074,14 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) if (!screen) return; - if ( screen->gartTextures.map ) { - drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); - } drmUnmapBufs( screen->buffers ); - drmUnmap( screen->status.map, screen->status.size ); - drmUnmap( screen->mmio.map, screen->mmio.size ); + if (!screen->kernel_mm) { + if ( screen->gartTextures.map ) { + drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); + } + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + } /* free all option information */ driDestroyOptionInfo (&screen->optionCache); @@ -1004,6 +1105,158 @@ radeonInitDriver( __DRIscreenPrivate *sPriv ) return GL_TRUE; } +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +static GLboolean +radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb, + GLenum intFormat, GLuint w, GLuint h) +{ + rb->Width = w; + rb->Height = h; + rb->_ActualFormat = intFormat; + + return GL_TRUE; +} + + +static struct radeon_renderbuffer * +radeon_create_renderbuffer(GLenum format) +{ + struct radeon_renderbuffer *ret; + + ret = CALLOC_STRUCT(radeon_renderbuffer); + if (!ret) + return NULL; + + _mesa_init_renderbuffer(&ret->base, 0); + + /* XXX format junk */ + switch (format) { + case GL_RGB5: + ret->base._ActualFormat = GL_RGB5; + ret->base._BaseFormat = GL_RGBA; + ret->base.RedBits = 5; + ret->base.GreenBits = 6; + ret->base.BlueBits = 5; + ret->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_RGBA8: + ret->base._ActualFormat = GL_RGBA8; + ret->base._BaseFormat = GL_RGBA; + ret->base.RedBits = 8; + ret->base.GreenBits = 8; + ret->base.BlueBits = 8; + ret->base.AlphaBits = 8; + ret->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_STENCIL_INDEX8_EXT: + ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT; + ret->base._BaseFormat = GL_STENCIL_INDEX; + ret->base.StencilBits = 8; + ret->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_DEPTH_COMPONENT16: + ret->base._ActualFormat = GL_DEPTH_COMPONENT16; + ret->base._BaseFormat = GL_DEPTH_COMPONENT; + ret->base.DepthBits = 16; + ret->base.DataType = GL_UNSIGNED_SHORT; + break; + case GL_DEPTH_COMPONENT24: + ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + ret->base._BaseFormat = GL_DEPTH_COMPONENT; + ret->base.DepthBits = 24; + ret->base.DataType = GL_UNSIGNED_INT; + break; + case GL_DEPTH24_STENCIL8_EXT: + ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT; + ret->base.DepthBits = 24; + ret->base.StencilBits = 8; + ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT; + break; + default: + /* whoops */ + break; + } + + ret->base.InternalFormat = format; + + ret->base.AllocStorage = radeon_alloc_window_storage; + + radeonSetSpanFunctions(ret); + + return ret; +} + +/** + * Create the Mesa framebuffer and renderbuffers for a given window/drawable. + * + * \todo This function (and its interface) will need to be updated to support + * pbuffers. + */ +static GLboolean +radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, + __DRIdrawablePrivate *driDrawPriv, + const __GLcontextModes *mesaVis, + GLboolean isPixmap ) +{ + radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; + + const GLboolean swDepth = GL_FALSE; + const GLboolean swAlpha = GL_FALSE; + const GLboolean swAccum = mesaVis->accumRedBits > 0; + const GLboolean swStencil = mesaVis->stencilBits > 0 && + mesaVis->depthBits != 24; + GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); + GLenum depthFormat = GL_NONE; + struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); + + if (mesaVis->depthBits == 16) + depthFormat = GL_DEPTH_COMPONENT16; + else if (mesaVis->depthBits == 24) + depthFormat = GL_DEPTH_COMPONENT24; + + /* front color renderbuffer */ + { + struct radeon_renderbuffer *front = + radeon_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base); + } + + /* back color renderbuffer */ + if (mesaVis->doubleBufferMode) { + struct radeon_renderbuffer *back = + radeon_create_renderbuffer(GL_RGBA); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base); + } + + /* depth renderbuffer */ + if (depthFormat != GL_NONE) { + struct radeon_renderbuffer *depth = + radeon_create_renderbuffer(depthFormat); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base); + depth->depthHasSurface = screen->depthHasSurface; + } + + /* stencil renderbuffer */ + if (mesaVis->stencilBits > 0 && !swStencil) { + struct radeon_renderbuffer *stencil = + radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base); + stencil->depthHasSurface = screen->depthHasSurface; + } + + _mesa_add_soft_renderbuffers(fb, + GL_FALSE, /* color */ + swDepth, + swStencil, + swAccum, + swAlpha, + GL_FALSE /* aux */); + driDrawPriv->driverPrivate = (void *) fb; + + return (driDrawPriv->driverPrivate != NULL); +} +#else /** * Create the Mesa framebuffer and renderbuffers for a given window/drawable. @@ -1105,6 +1358,11 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, } + + +#endif + + static void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { @@ -1199,11 +1457,11 @@ radeonInitScreen(__DRIscreenPrivate *psp) if (!radeonInitDriver(psp)) return NULL; + /* for now fill in all modes */ return radeonFillInModes( psp, dri_priv->bpp, (dri_priv->bpp == 16) ? 16 : 24, - (dri_priv->bpp == 16) ? 0 : 8, - (dri_priv->backOffset != dri_priv->depthOffset) ); + (dri_priv->bpp == 16) ? 0 : 8, 1); } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index ab859d5..82eb7d8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -55,6 +55,14 @@ typedef struct { drmAddress map; /* Mapping of the DRM region */ } radeonRegionRec, *radeonRegionPtr; +struct radeon_gem_object { + uint32_t gem_name; + uint32_t gem_handle; + uint64_t size; + void *map; + uint64_t offset; +}; + typedef struct { int chip_family; int chip_flags; @@ -107,6 +115,13 @@ typedef struct { const __DRIextension *extensions[8]; int num_gb_pipes; + + int kernel_mm; + struct radeon_gem_object front; + struct radeon_gem_object back; + struct radeon_gem_object depth; + struct radeon_gem_object vram_texture; + struct radeon_gem_object gart_texture; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h index 9abe086..1650a9b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.h +++ b/src/mesa/drivers/dri/radeon/radeon_span.h @@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" +#include "radeon_buffer.h" + extern void radeonInitSpanFuncs(GLcontext * ctx); -extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); +#if COMPILE_R300 +extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); +#else +extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); +#endif #endif