mesa/r300-cmdbuf.patch

diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index e9144ac..b6ed58b 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -48,7 +48,8 @@ SYMLINKS = \
 COMMON_SYMLINKS = \
 	radeon_chipset.h \
 	radeon_screen.c \
-	radeon_screen.h
+	radeon_screen.h \
+	radeon_buffer.h

 ##### TARGETS #####

diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 6ca9342..3bb1ff4 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -11,15 +11,6 @@ ifeq ($(USING_EGL), 1)
 EGL_SOURCES = server/radeon_egl.c
 endif

-COMMON_SOURCES = \
-	../../common/driverfuncs.c \
-	../common/mm.c \
-	../common/utils.c \
-	../common/texmem.c \
-	../common/vblank.c \
-	../common/xmlconfig.c \
-	../common/dri_util.c
-
 DRIVER_SOURCES = \
 		 radeon_screen.c \
 		 radeon_context.c \
@@ -36,6 +27,7 @@ DRIVER_SOURCES = \
 		 r300_texmem.c \
 		 r300_tex.c \
 		 r300_texstate.c \
+		 r300_mipmap_tree.c \
 		 radeon_program.c \
 		 radeon_program_alu.c \
 		 radeon_program_pair.c \
@@ -51,7 +43,7 @@ DRIVER_SOURCES = \
 		 r300_swtcl.c \
 		 $(EGL_SOURCES)

-C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+C_SOURCES = $(COMMON_SOURCES) $(COMMON_BM_SOURCES) $(DRIVER_SOURCES)

 DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
 	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
@@ -68,7 +60,8 @@ COMMON_SYMLINKS = \
 	radeon_chipset.h \
 	radeon_screen.c \
 	radeon_screen.h \
-	radeon_span.h
+	radeon_span.h \
+	radeon_buffer.h

 ##### TARGETS #####

diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index c069660..493b0ac 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -51,11 +51,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_reg.h"
 #include "r300_cmdbuf.h"
 #include "r300_emit.h"
+#include "r300_mem.h"
+#include "r300_mipmap_tree.h"
 #include "r300_state.h"

 // Set this to 1 for extremely verbose debugging of command buffers
 #define DEBUG_CMDBUF		0

+/** # of dwords reserved for additional instructions that may need to be written
+ * during flushing.
+ */
+#define SPACE_FOR_FLUSHING	4
+
 /**
  * Send the current command buffer via ioctl to the hardware.
  */
@@ -66,24 +73,42 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
 	drm_radeon_cmd_buffer_t cmd;
 	int start;

+	if (r300->cmdbuf.flushing) {
+		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
+		exit(-1);
+	}
+	r300->cmdbuf.flushing = 1;
+
 	if (r300->radeon.lost_context) {
 		start = 0;
 		r300->radeon.lost_context = GL_FALSE;
 	} else
-		start = r300->cmdbuf.count_reemit;
+		start = r300->cmdbuf.reemit;

 	if (RADEON_DEBUG & DEBUG_IOCTL) {
 		fprintf(stderr, "%s from %s - %i cliprects\n",
 			__FUNCTION__, caller, r300->radeon.numClipRects);

-		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
-			for (i = start; i < r300->cmdbuf.count_used; ++i)
+		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) {
+			fprintf(stderr, "written: %d  committed: %d\n", r300->cmdbuf.written, r300->cmdbuf.committed);
+			for (i = start; i < r300->cmdbuf.written; ++i)
 				fprintf(stderr, "%d: %08x\n", i,
-					r300->cmdbuf.cmd_buf[i]);
+					((uint32_t*)r300->cmdbuf.buf->virtual)[i]);
+		}
 	}

-	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
-	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
+	if (r300->cmdbuf.written != r300->cmdbuf.committed) {
+		_mesa_problem(r300->radeon.glCtx,
+			"Command buffer contains %d uncommitted dwords\n"
+			"in r300FlushCmdBufLocked called from %s.\n",
+			r300->cmdbuf.written - r300->cmdbuf.committed, caller);
+	}
+
+	dri_bo_unmap(r300->cmdbuf.buf);
+	dri_process_relocs(r300->cmdbuf.buf, 0);
+
+	cmd.buf = (char *)r300->cmdbuf.buf->virtual + 4*start;
+	cmd.bufsz = (r300->cmdbuf.committed - start) * 4;

 	if (r300->radeon.state.scissor.enabled) {
 		cmd.nbox = r300->radeon.state.scissor.numClipRects;
@@ -103,9 +128,19 @@ int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
 		radeonWaitForIdleLocked(&r300->radeon);
 	}

+	dri_post_submit(r300->cmdbuf.buf, 0);
+	dri_bo_unreference(r300->cmdbuf.buf);
+
 	r300->dma.nr_released_bufs = 0;
-	r300->cmdbuf.count_used = 0;
-	r300->cmdbuf.count_reemit = 0;
+	r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf",
+		r300->cmdbuf.size*4, 16, DRM_BO_MEM_CMDBUF);
+	r300->cmdbuf.written = 0;
+	r300->cmdbuf.reserved = 0;
+	r300->cmdbuf.committed = 0;
+	r300->cmdbuf.reemit = 0;
+	dri_bo_map(r300->cmdbuf.buf, GL_TRUE);
+
+	r300->cmdbuf.flushing = 0;

 	return ret;
 }
@@ -115,9 +150,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
 	int ret;

 	LOCK_HARDWARE(&r300->radeon);
-
 	ret = r300FlushCmdBufLocked(r300, caller);
-
 	UNLOCK_HARDWARE(&r300->radeon);

 	if (ret) {
@@ -128,6 +161,44 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
 	return ret;
 }

+/**
+ * Make sure that enough space is available in the command buffer
+ * by flushing if necessary.
+ *
+ * \param dwords The number of dwords we need to be free on the command buffer
+ */
+void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller)
+{
+	assert(dwords < r300->cmdbuf.size);
+
+	if (!r300->cmdbuf.flushing)
+		dwords += SPACE_FOR_FLUSHING;
+
+	if (r300->cmdbuf.written + dwords > r300->cmdbuf.size)
+		r300FlushCmdBuf(r300, caller);
+}
+
+void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line)
+{
+	assert(r300->cmdbuf.written == r300->cmdbuf.reserved);
+
+	r300EnsureCmdBufSpace(r300, n, function);
+
+	if (autostate && !r300->cmdbuf.written) {
+		if (RADEON_DEBUG & DEBUG_IOCTL)
+			fprintf(stderr,
+				"Reemit state after flush (from %s)\n", function);
+		r300EmitState(r300);
+	}
+
+	r300->cmdbuf.reserved += n;
+	assert(r300->cmdbuf.reserved < r300->cmdbuf.size);
+
+	if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL)
+		fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
+			n, r300->cmdbuf.written, function, line);
+}
+
 static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
 {
 	int i;
@@ -152,33 +223,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat
  */
 static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
 {
+	BATCH_LOCALS(r300);
 	struct r300_state_atom *atom;
-	uint32_t *dest;
 	int dwords;

-	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
-
-	/* Emit WAIT */
-	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-	dest++;
-	r300->cmdbuf.count_used++;
-
-	/* Emit cache flush */
-	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
-	dest++;
-	r300->cmdbuf.count_used++;
-
-	*dest = R300_TX_FLUSH;
-	dest++;
-	r300->cmdbuf.count_used++;
-
-	/* Emit END3D */
-	*dest = cmdpacify();
-	dest++;
-	r300->cmdbuf.count_used++;
+	BEGIN_BATCH_NO_AUTOSTATE(4);
+	OUT_BATCH(cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN));
+	OUT_BATCH(cmdpacket0(R300_TX_INVALTAGS, 1));
+	OUT_BATCH(R300_TX_FLUSH);
+	OUT_BATCH(cmdpacify());
+	END_BATCH();

 	/* Emit actual atoms */
-
 	foreach(atom, &r300->hw.atomlist) {
 		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
 			dwords = (*atom->check) (r300, atom);
@@ -186,9 +242,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
 					r300PrintStateAtom(r300, atom);
 				}
-				memcpy(dest, atom->cmd, dwords * 4);
-				dest += dwords;
-				r300->cmdbuf.count_used += dwords;
+				if (atom->emit) {
+					(*atom->emit)(r300);
+				} else {
+					BEGIN_BATCH_NO_AUTOSTATE(dwords);
+					OUT_BATCH_TABLE(atom->cmd, dwords);
+					END_BATCH();
+				}
 				atom->dirty = GL_FALSE;
 			} else {
 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
@@ -198,6 +258,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
 			}
 		}
 	}
+
+	COMMIT_BATCH();
 }

 /**
@@ -211,22 +273,21 @@ void r300EmitState(r300ContextPtr r300)
 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
 		fprintf(stderr, "%s\n", __FUNCTION__);

-	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
+	if (r300->cmdbuf.written && !r300->hw.is_dirty
 	    && !r300->hw.all_dirty)
 		return;

 	/* To avoid going across the entire set of states multiple times, just check
-	 * for enough space for the case of emitting all state, and inline the
-	 * r300AllocCmdBuf code here without all the checks.
+	 * for enough space for the case of emitting all state.
 	 */
 	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);

-	if (!r300->cmdbuf.count_used) {
+	if (!r300->cmdbuf.written) {
 		if (RADEON_DEBUG & DEBUG_STATE)
 			fprintf(stderr, "Begin reemit state\n");

 		r300EmitAtoms(r300, GL_FALSE);
-		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
+		r300->cmdbuf.reemit = r300->cmdbuf.committed;
 	}

 	if (RADEON_DEBUG & DEBUG_STATE)
@@ -234,7 +295,7 @@ void r300EmitState(r300ContextPtr r300)

 	r300EmitAtoms(r300, GL_TRUE);

-	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
+	assert(r300->cmdbuf.written < r300->cmdbuf.size);

 	r300->hw.is_dirty = GL_FALSE;
 	r300->hw.all_dirty = GL_FALSE;
@@ -244,6 +305,79 @@ void r300EmitState(r300ContextPtr r300)
 #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
 #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)

+static void emit_tex_offsets(r300ContextPtr r300)
+{
+	BATCH_LOCALS(r300);
+	int numtmus = packet0_count(r300->hw.tex.offset.cmd);
+
+	if (numtmus) {
+		int i;
+
+		BEGIN_BATCH(numtmus + 1);
+		OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, numtmus);
+		for(i = 0; i < numtmus; ++i) {
+			r300TexObj *t = r300->hw.textures[i];
+			if (t && !t->image_override) {
+				OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, DRM_RELOC_TXOFFSET);
+			} else if (!t) {
+				OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
+			} else {
+				OUT_BATCH(t->override_offset);
+			}
+		}
+		END_BATCH();
+	}
+}
+
+static void emit_cb_offset(r300ContextPtr r300)
+{
+	BATCH_LOCALS(r300);
+	struct radeon_renderbuffer *rrb;
+	uint32_t cbpitch;
+
+	rrb = r300->radeon.state.color.rrb;
+	if (!rrb) {
+		fprintf(stderr, "no rrb\n");
+		return;
+	}
+
+	cbpitch = rrb->pitch;
+	if (rrb->cpp == 4)
+		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+	else
+		cbpitch |= R300_COLOR_FORMAT_RGB565;
+
+	if (r300->radeon.sarea->tiling_enabled)
+		cbpitch |= R300_COLOR_TILE_ENABLE;
+
+	BEGIN_BATCH(4);
+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
+	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
+	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
+	OUT_BATCH(cbpitch);
+	END_BATCH();
+}
+
+static void emit_zb_offset(r300ContextPtr r300)
+{
+	BATCH_LOCALS(r300);
+	struct radeon_renderbuffer *rrb;
+	uint32_t zbpitch;
+
+	rrb = r300->radeon.state.depth_buffer;
+	if (!rrb)
+		return;
+
+	zbpitch = rrb->pitch;
+
+	BEGIN_BATCH(3);
+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 2);
+	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
+	OUT_BATCH(zbpitch);
+	END_BATCH();
+
+}
+
 static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
 {
 	return atom->cmd_size;
@@ -480,8 +614,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
 	ALLOC_STATE(rop, always, 2, 0);
 	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
-	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
-	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
+	r300->hw.cb.emit = &emit_cb_offset;
 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
 	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
@@ -495,7 +628,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
 	r300->hw.zstencil_format.cmd[0] =
 	    cmdpacket0(R300_ZB_FORMAT, 4);
 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
-	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
+	r300->hw.zb.emit = emit_zb_offset;
 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
 	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
 	ALLOC_STATE(unk4F30, always, 3, 0);
@@ -562,9 +695,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);

-	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
+	ALLOC_STATE(tex.offset, variable, 1, 0);
 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
 	    cmdpacket0(R300_TX_OFFSET_0, 0);
+	r300->hw.tex.offset.emit = &emit_tex_offsets;

 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
@@ -597,10 +731,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
 			size * 4, r300->hw.max_state_size * 4);
 	}

+	r300->cmdbuf.buf = dri_bo_alloc(&r300->radeon.bufmgr->base, "cmdbuf",
+		size*4, 16, DRM_BO_MEM_CMDBUF);
 	r300->cmdbuf.size = size;
-	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
-	r300->cmdbuf.count_used = 0;
-	r300->cmdbuf.count_reemit = 0;
+	r300->cmdbuf.written = 0;
+	r300->cmdbuf.reserved = 0;
+	r300->cmdbuf.committed = 0;
+	r300->cmdbuf.reemit = 0;
+	dri_bo_map(r300->cmdbuf.buf, GL_TRUE);
 }

 /**
@@ -610,66 +748,10 @@ void r300DestroyCmdBuf(r300ContextPtr r300)
 {
 	struct r300_state_atom *atom;

-	FREE(r300->cmdbuf.cmd_buf);
+	dri_bo_unmap(r300->cmdbuf.buf);
+	dri_bo_unreference(r300->cmdbuf.buf);

 	foreach(atom, &r300->hw.atomlist) {
 		FREE(atom->cmd);
 	}
 }
-
-void r300EmitBlit(r300ContextPtr rmesa,
-		  GLuint color_fmt,
-		  GLuint src_pitch,
-		  GLuint src_offset,
-		  GLuint dst_pitch,
-		  GLuint dst_offset,
-		  GLint srcx, GLint srcy,
-		  GLint dstx, GLint dsty, GLuint w, GLuint h)
-{
-	drm_r300_cmd_header_t *cmd;
-
-	if (RADEON_DEBUG & DEBUG_IOCTL)
-		fprintf(stderr,
-			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
-			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
-			dst_pitch, dst_offset, dstx, dsty, w, h);
-
-	assert((src_pitch & 63) == 0);
-	assert((dst_pitch & 63) == 0);
-	assert((src_offset & 1023) == 0);
-	assert((dst_offset & 1023) == 0);
-	assert(w < (1 << 16));
-	assert(h < (1 << 16));
-
-	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
-
-	cmd[0].header.cmd_type = R300_CMD_PACKET3;
-	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
-	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
-	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
-		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-		    RADEON_GMC_BRUSH_NONE |
-		    (color_fmt << 8) |
-		    RADEON_GMC_SRC_DATATYPE_COLOR |
-		    RADEON_ROP3_S |
-		    RADEON_DP_SRC_SOURCE_MEMORY |
-		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
-
-	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
-	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
-	cmd[5].u = (srcx << 16) | srcy;
-	cmd[6].u = (dstx << 16) | dsty;	/* dst */
-	cmd[7].u = (w << 16) | h;
-}
-
-void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
-{
-	drm_r300_cmd_header_t *cmd;
-
-	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
-
-	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
-	cmd[0].u = 0;
-	cmd[0].wait.cmd_type = R300_CMD_WAIT;
-	cmd[0].wait.flags = flags;
-}
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
index a8eaa58..4708a4c 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
@@ -45,29 +45,88 @@ extern void r300EmitState(r300ContextPtr r300);

 extern void r300InitCmdBuf(r300ContextPtr r300);
 extern void r300DestroyCmdBuf(r300ContextPtr r300);
+extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller);
+
+extern void r300BeginBatch(r300ContextPtr r300, int n, GLboolean autostate, const char* function, int line);

 /**
- * Make sure that enough space is available in the command buffer
- * by flushing if necessary.
- *
- * \param dwords The number of dwords we need to be free on the command buffer
+ * Every function writing to the command buffer needs to declare this
+ * to get the necessary local variables.
  */
-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
-					     int dwords, const char *caller)
-{
-	assert(dwords < r300->cmdbuf.size);
+#define BATCH_LOCALS(r300) \
+	const r300ContextPtr b_l_r300 = r300

-	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
-		r300FlushCmdBuf(r300, caller);
-}
+/**
+ * Prepare writing n dwords to the command buffer,
+ * including producing any necessary state emits on buffer wraparound.
+ */
+#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, GL_TRUE, __FUNCTION__, __LINE__)
+
+/**
+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
+ */
+#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, GL_FALSE, __FUNCTION__, __LINE__)
+
+/**
+ * Write one dword to the command buffer.
+ */
+#define OUT_BATCH(data) \
+	do { \
+		if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \
+			((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \
+		} else { \
+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \
+		} \
+	} while(0)

 /**
- * Allocate the given number of dwords in the command buffer and return
- * a pointer to the allocated area.
- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
- * causes state reemission after a flush. This is necessary to ensure
- * correct hardware state after an unlock.
+ * Write a relocated dword to the command buffer.
  */
+#define OUT_BATCH_RELOC(data, bo, offset, flags) \
+	do { \
+		if (b_l_r300->cmdbuf.written < b_l_r300->cmdbuf.reserved) { \
+			dri_emit_reloc(b_l_r300->cmdbuf.buf, flags, offset, 4*b_l_r300->cmdbuf.written, bo); \
+			((uint32_t*)b_l_r300->cmdbuf.buf->virtual)[b_l_r300->cmdbuf.written++] = data; \
+		} else { \
+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH mismatch", __FUNCTION__, __LINE__); \
+		} \
+	} while(0)
+
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define OUT_BATCH_TABLE(ptr,n) \
+	do { \
+		int _n = n; \
+		if (b_l_r300->cmdbuf.written+_n <= b_l_r300->cmdbuf.reserved) { \
+			memcpy((uint32_t*)b_l_r300->cmdbuf.buf->virtual + b_l_r300->cmdbuf.written, (ptr), 4*_n); \
+			b_l_r300->cmdbuf.written += _n; \
+		} else { \
+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: OUT_BATCH_TABLE mismatch", __FUNCTION__, __LINE__); \
+		} \
+	} while(0)
+
+/**
+ * Finish writing dwords to the command buffer.
+ * The number of (direct or indirect) OUT_BATCH calls between the previous
+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
+ */
+#define END_BATCH() \
+	do { \
+		if (b_l_r300->cmdbuf.written != b_l_r300->cmdbuf.reserved) \
+			_mesa_problem(b_l_r300->radeon.glCtx, "%s:%i: END_BATCH mismatch", __FUNCTION__, __LINE__); \
+	} while(0)
+
+/**
+ * After the last END_BATCH() of rendering, this indicates that flushing
+ * the command buffer now is okay.
+ */
+#define COMMIT_BATCH() \
+	do { \
+		assert(b_l_r300->cmdbuf.written == b_l_r300->cmdbuf.reserved); \
+		b_l_r300->cmdbuf.committed = b_l_r300->cmdbuf.written; \
+	} while(0)
+
 static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
 					       int dwords, const char *caller)
 {
@@ -75,8 +134,9 @@ static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,

 	r300EnsureCmdBufSpace(r300, dwords, caller);

-	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
-	r300->cmdbuf.count_used += dwords;
+	ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written;
+	r300->cmdbuf.written += dwords;
+	r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written;
 	return ptr;
 }

@@ -87,30 +147,17 @@ static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,

 	r300EnsureCmdBufSpace(r300, dwords, caller);

-	if (!r300->cmdbuf.count_used) {
+	if (!r300->cmdbuf.written) {
 		if (RADEON_DEBUG & DEBUG_IOCTL)
 			fprintf(stderr,
 				"Reemit state after flush (from %s)\n", caller);
 		r300EmitState(r300);
 	}

-	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
-	r300->cmdbuf.count_used += dwords;
+	ptr = (uint32_t*)r300->cmdbuf.buf->virtual + r300->cmdbuf.written;
+	r300->cmdbuf.written += dwords;
+	r300->cmdbuf.reserved = r300->cmdbuf.committed = r300->cmdbuf.written;
 	return ptr;
 }

-extern void r300EmitBlit(r300ContextPtr rmesa,
-			 GLuint color_fmt,
-			 GLuint src_pitch,
-			 GLuint src_offset,
-			 GLuint dst_pitch,
-			 GLuint dst_offset,
-			 GLint srcx, GLint srcy,
-			 GLint dstx, GLint dsty, GLuint w, GLuint h);
-
-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
-extern void r300EmitVertexShader(r300ContextPtr rmesa);
-extern void r300EmitPixelShader(r300ContextPtr rmesa);
-
 #endif				/* __R300_CMDBUF_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index fcf571d..cc9c11a 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -59,15 +59,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_span.h"
 #include "r300_context.h"
 #include "r300_cmdbuf.h"
+#include "r300_mipmap_tree.h"
 #include "r300_state.h"
 #include "r300_ioctl.h"
 #include "r300_tex.h"
 #include "r300_emit.h"
 #include "r300_swtcl.h"

-#ifdef USER_BUFFERS
 #include "r300_mem.h"
-#endif

 #include "vblank.h"
 #include "utils.h"
@@ -190,7 +189,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	struct dd_function_table functions;
 	r300ContextPtr r300;
 	GLcontext *ctx;
-	int tcl_mode, i;
+	int tcl_mode;

 	assert(glVisual);
 	assert(driContextPriv);
@@ -222,10 +221,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	r300InitTextureFuncs(&functions);
 	r300InitShaderFuncs(&functions);

-#ifdef USER_BUFFERS
-	r300_mem_init(r300);
-#endif
-
 	if (!radeonInitContext(&r300->radeon, &functions,
 			       glVisual, driContextPriv,
 			       sharedContextPrivate)) {
@@ -233,34 +228,9 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 		return GL_FALSE;
 	}

+	r300->radeon.bufmgr = radeonBufmgrClassicInit(r300);
+
 	/* Init r300 context data */
-	r300->dma.buf0_address =
-	    r300->radeon.radeonScreen->buffers->list[0].address;
-
-	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
-	make_empty_list(&r300->swapped);
-
-	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
-	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
-	for (i = 0; i < r300->nr_heaps; i++) {
-		/* *INDENT-OFF* */
-		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
-							       screen->
-							       texSize[i], 12,
-							       RADEON_NR_TEX_REGIONS,
-							       (drmTextureRegionPtr)
-							       r300->radeon.sarea->
-							       tex_list[i],
-							       &r300->radeon.sarea->
-							       tex_age[i],
-							       &r300->swapped,
-							       sizeof
-							       (r300TexObj),
-							       (destroy_texture_object_t
-								*)
-							       r300DestroyTexObj);
-		/* *INDENT-ON* */
-	}
 	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
 					      "texture_depth");
 	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
@@ -299,13 +269,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;

-#ifdef USER_BUFFERS
 	/* Needs further modifications */
 #if 0
 	ctx->Const.MaxArrayLockSize =
 	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
 #endif
-#endif

 	/* Initialize the software rasterizer and helper modules.
 	 */
@@ -407,72 +375,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	return GL_TRUE;
 }

-static void r300FreeGartAllocations(r300ContextPtr r300)
-{
-	int i, ret, tries = 0, done_age, in_use = 0;
-	drm_radeon_mem_free_t memfree;
-
-	memfree.region = RADEON_MEM_REGION_GART;
-
-#ifdef USER_BUFFERS
-	for (i = r300->rmm->u_last; i > 0; i--) {
-		if (r300->rmm->u_list[i].ptr == NULL) {
-			continue;
-		}
-
-		/* check whether this buffer is still in use */
-		if (r300->rmm->u_list[i].pending) {
-			in_use++;
-		}
-	}
-	/* Cannot flush/lock if no context exists. */
-	if (in_use)
-		r300FlushCmdBuf(r300, __FUNCTION__);
-
-	done_age = radeonGetAge((radeonContextPtr) r300);
-
-	for (i = r300->rmm->u_last; i > 0; i--) {
-		if (r300->rmm->u_list[i].ptr == NULL) {
-			continue;
-		}
-
-		/* check whether this buffer is still in use */
-		if (!r300->rmm->u_list[i].pending) {
-			continue;
-		}
-
-		assert(r300->rmm->u_list[i].h_pending == 0);
-
-		tries = 0;
-		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
-			usleep(10);
-			done_age = radeonGetAge((radeonContextPtr) r300);
-		}
-		if (tries >= 1000) {
-			WARN_ONCE("Failed to idle region!");
-		}
-
-		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
-		    (char *)r300->radeon.radeonScreen->gartTextures.map;
-
-		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
-				      DRM_RADEON_FREE, &memfree,
-				      sizeof(memfree));
-		if (ret) {
-			fprintf(stderr, "Failed to free at %p\nret = %s\n",
-				r300->rmm->u_list[i].ptr, strerror(-ret));
-		} else {
-			if (i == r300->rmm->u_last)
-				r300->rmm->u_last--;
-
-			r300->rmm->u_list[i].pending = 0;
-			r300->rmm->u_list[i].ptr = NULL;
-		}
-	}
-	r300->rmm->u_head = i;
-#endif				/* USER_BUFFERS */
-}
-
 /* Destroy the device specific context.
  */
 void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
@@ -496,24 +398,17 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
 	assert(r300);		/* should never be null */

 	if (r300) {
-		GLboolean release_texture_heaps;
-
-		release_texture_heaps =
-		    (r300->radeon.glCtx->Shared->RefCount == 1);
 		_swsetup_DestroyContext(r300->radeon.glCtx);
 		_tnl_ProgramCacheDestroy(r300->radeon.glCtx);
 		_tnl_DestroyContext(r300->radeon.glCtx);
 		_vbo_DestroyContext(r300->radeon.glCtx);
 		_swrast_DestroyContext(r300->radeon.glCtx);

-		if (r300->dma.current.buf) {
-			r300ReleaseDmaRegion(r300, &r300->dma.current,
-					     __FUNCTION__);
-#ifndef USER_BUFFERS
-			r300FlushCmdBuf(r300, __FUNCTION__);
-#endif
+		if (r300->dma.current) {
+			dri_bo_unreference(r300->dma.current);
+			r300->dma.current = 0;
 		}
-		r300FreeGartAllocations(r300);
+		r300FlushCmdBuf(r300, __FUNCTION__);
 		r300DestroyCmdBuf(r300);

 		if (radeon->state.scissor.pClipRects) {
@@ -521,28 +416,13 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
 			radeon->state.scissor.pClipRects = NULL;
 		}

-		if (release_texture_heaps) {
-			/* This share group is about to go away, free our private
-			 * texture object data.
-			 */
-			int i;
-
-			for (i = 0; i < r300->nr_heaps; i++) {
-				driDestroyTextureHeap(r300->texture_heaps[i]);
-				r300->texture_heaps[i] = NULL;
-			}
-
-			assert(is_empty_list(&r300->swapped));
-		}
-
 		radeonCleanupContext(&r300->radeon);

-#ifdef USER_BUFFERS
 		/* the memory manager might be accessed when Mesa frees the shared
 		 * state, so don't destroy it earlier
 		 */
-		r300_mem_destroy(r300);
-#endif
+		dri_bufmgr_destroy(&r300->radeon.bufmgr->base);
+		r300->radeon.bufmgr = 0;

 		/* free the option cache */
 		driDestroyOptionCache(&r300->radeon.optionCache);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index d2017f8..5c99740 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/t_vertex.h"
 #include "drm.h"
 #include "radeon_drm.h"
+#include "dri_bufmgr.h"
 #include "dri_util.h"
 #include "texmem.h"

@@ -47,11 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "mtypes.h"
 #include "colormac.h"

-#define USER_BUFFERS
-
 struct r300_context;
 typedef struct r300_context r300ContextRec;
 typedef struct r300_context *r300ContextPtr;
+typedef struct radeon_bufmgr radeon_bufmgr;

 #include "radeon_lock.h"
 #include "mm.h"
@@ -122,44 +122,22 @@ static INLINE uint32_t r300PackFloat24(float f)

 /************ DMA BUFFERS **************/

-/* Need refcounting on dma buffers:
- */
-struct r300_dma_buffer {
-	int refcount;		/**< the number of retained regions in buf */
-	drmBufPtr buf;
-	int id;
-};
-#undef GET_START
-#ifdef USER_BUFFERS
-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
-#else
-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
-			(rvb)->address - rmesa->dma.buf0_address +	\
-			(rvb)->start)
-#endif
-/* A retained region, eg vertices for indexed vertices.
- */
-struct r300_dma_region {
-	struct r300_dma_buffer *buf;
-	char *address;		/* == buf->address */
-	int start, end, ptr;	/* offsets from start of buf */
-
-	int aos_offset;		/* address in GART memory */
-	int aos_stride;		/* distance between elements, in dwords */
-	int aos_size;		/* number of components (1-4) */
-};
-
 struct r300_dma {
 	/* Active dma region.  Allocations for vertices and retained
 	 * regions come from here.  Also used for emitting random vertices,
 	 * these may be flushed by calling flush_current();
 	 */
-	struct r300_dma_region current;
+	dri_bo *current; /** Buffer that DMA memory is allocated from */
+	int current_used; /** Number of bytes allocated and forgotten about */
+	int current_vertexptr; /** End of active vertex region */

+	/**
+	 * If current_vertexptr != current_used then flush must be non-zero.
+	 * flush must be called before non-active vertex allocations can be
+	 * performed.
+	 */
 	void (*flush) (r300ContextPtr);

-	char *buf0_address;	/* start of buf[0], for index calcs */
-
 	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
 	 * for which a DISCARD command is currently queued in the command buffer.
 	 */
@@ -173,15 +151,12 @@ typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
 /* Texture object in locally shared texture space.
  */
 struct r300_tex_obj {
-	driTextureObject base;
-
-	GLuint bufAddr;		/* Offset to start of locally
-				   shared texture block */
-
-	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
-	/* Six, for the cube faces */
+	struct gl_texture_object base;
+	struct _r300_mipmap_tree *mt;
+	GLuint dirty_images[6];

 	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
+	GLuint override_offset;

 	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
 	/* hardware register values */
@@ -191,30 +166,16 @@ struct r300_tex_obj {
 	GLuint pitch_reg;
 	GLuint size;		/* npot only */
 	GLuint format;
-	GLuint offset;		/* Image location in the card's address space.
-				   All cube faces follow. */
-	GLuint unknown4;
-	GLuint unknown5;
-	/* end hardware registers */
-
-	/* registers computed by r200 code - keep them here to
-	   compare against what is actually written.
-
-	   to be removed later.. */
 	GLuint pp_border_color;
-	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
-	GLuint format_x;
-
-	GLboolean border_fallback;
+	/* end hardware registers */

 	GLuint tile_bits;	/* hw texture tile bits used on this texture */
 };

-struct r300_texture_env_state {
-	r300TexObjPtr texobj;
-	GLenum format;
-	GLenum envMode;
-};
+static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj)
+{
+	return (r300TexObj*)texObj;
+}

 /* The blit width for texture uploads
  */
@@ -222,7 +183,6 @@ struct r300_texture_env_state {
 #define R300_MAX_TEXTURE_UNITS 8

 struct r300_texture_state {
-	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
 	int tc_count;		/* number of incoming texture coordinates from VAP */
 };

@@ -242,6 +202,7 @@ struct r300_state_atom {
 	GLboolean dirty;

 	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
+	void (*emit) (r300ContextPtr);
 };

 #define R300_VPT_CMD_0		0
@@ -549,6 +510,8 @@ struct r300_hw_state {
 		struct r300_state_atom border_color;
 	} tex;
 	struct r300_state_atom txe;	/* tex enable (4104) */
+
+	r300TexObj *textures[R300_MAX_TEXTURE_UNITS];
 };

 /**
@@ -559,10 +522,14 @@ struct r300_hw_state {
  * otherwise.
  */
 struct r300_cmdbuf {
-	int size;		/* DWORDs allocated for buffer */
-	uint32_t *cmd_buf;
-	int count_used;		/* DWORDs filled so far */
-	int count_reemit;	/* size of re-emission batch */
+	dri_bo *buf;
+	int reemit; /** # of dwords in reemit sequence (is always <= committed) */
+	int size; /** # of dwords total */
+
+	int committed; /** # of dwords that we have committed to */
+	int written; /** # of dwords written (is always >= committed) */
+	int reserved; /** # of dwords reserved up to previous BEGIN_BATCH */
+	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
 };

 /**
@@ -811,18 +778,25 @@ struct r500_fragment_program {
 #define REG_COLOR0	1
 #define REG_TEX0	2

+struct r300_aos {
+	dri_bo *bo; /** Buffer object where vertex data is stored */
+	int offset; /** Offset into buffer object, in bytes */
+	int components; /** Number of components per vertex */
+	int stride; /** Stride in dwords (may be 0 for repeating) */
+	int count; /** Number of vertices */
+};
+
 struct r300_state {
 	struct r300_depthbuffer_state depth;
 	struct r300_texture_state texture;
 	int sw_tcl_inputs[VERT_ATTRIB_MAX];
 	struct r300_vertex_shader_state vertex_shader;
-	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
+	struct r300_aos aos[R300_MAX_AOS_ARRAYS];
 	int aos_count;

-	GLuint *Elts;
-	struct r300_dma_region elt_dma;
+	dri_bo *elt_dma_bo; /** Buffer object that contains element indices */
+	int elt_dma_offset; /** Offset into this buffer object, in bytes */

-	struct r300_dma_region swtcl_dma;
 	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
 							   They are the same as tnl->render_inputs for fixed pipeline */

@@ -880,13 +854,6 @@ struct r300_swtcl_info {
     * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
     */
    GLuint specoffset;
-
-   /**
-    * Should Mesa project vertex data or will the hardware do it?
-    */
-   GLboolean needproj;
-
-   struct r300_dma_region indexed_verts;
 };


@@ -905,25 +872,11 @@ struct r300_context {
 	/* Vertex buffers
 	 */
 	struct r300_dma dma;
-	GLboolean save_on_next_unlock;
 	GLuint NewGLState;

-	/* Texture object bookkeeping
-	 */
-	unsigned nr_heaps;
-	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
-	driTextureObject swapped;
 	int texture_depth;
 	float initialMaxAnisotropy;

-	/* Clientdata textures;
-	 */
-	GLuint prefer_gart_client_texturing;
-
-#ifdef USER_BUFFERS
-	struct r300_memory_manager *rmm;
-#endif
-
 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];

diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
index 2ea17ad..5e2afd5 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -51,9 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_emit.h"
 #include "r300_ioctl.h"

-#ifdef USER_BUFFERS
 #include "r300_mem.h"
-#endif

 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
@@ -86,11 +84,9 @@ do {						\
 } while (0)
 #endif

-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
-			 GLvoid * data, int stride, int count)
+static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
 {
 	int i;
-	int *out = (int *)(rvb->address + rvb->start);

 	if (RADEON_DEBUG & DEBUG_VERTS)
 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
@@ -106,11 +102,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
 		}
 }

-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
-			 GLvoid * data, int stride, int count)
+static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
 {
 	int i;
-	int *out = (int *)(rvb->address + rvb->start);

 	if (RADEON_DEBUG & DEBUG_VERTS)
 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
@@ -127,11 +121,9 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
 		}
 }

-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
-			  GLvoid * data, int stride, int count)
+static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
 {
 	int i;
-	int *out = (int *)(rvb->address + rvb->start);

 	if (RADEON_DEBUG & DEBUG_VERTS)
 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
@@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
 		}
 }

-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
-			  GLvoid * data, int stride, int count)
+static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
 {
 	int i;
-	int *out = (int *)(rvb->address + rvb->start);

 	if (RADEON_DEBUG & DEBUG_VERTS)
 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
@@ -172,35 +162,31 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
 		}
 }

-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
+
+static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos,
 			GLvoid * data, int size, int stride, int count)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	uint32_t *out;

 	if (stride == 0) {
-		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
+		r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 		count = 1;
-		rvb->aos_offset = GET_START(rvb);
-		rvb->aos_stride = 0;
+		aos->stride = 0;
 	} else {
-		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
-		rvb->aos_offset = GET_START(rvb);
-		rvb->aos_stride = size;
+		r300AllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
+		aos->stride = size;
 	}

+	aos->components = size;
+	aos->count = count;
+
+	out = (uint32_t*)((char*)aos->bo->virtual + aos->offset);
 	switch (size) {
-	case 1:
-		r300EmitVec4(ctx, rvb, data, stride, count);
-		break;
-	case 2:
-		r300EmitVec8(ctx, rvb, data, stride, count);
-		break;
-	case 3:
-		r300EmitVec12(ctx, rvb, data, stride, count);
-		break;
-	case 4:
-		r300EmitVec16(ctx, rvb, data, stride, count);
-		break;
+	case 1: r300EmitVec4(out, data, stride, count); break;
+	case 2: r300EmitVec8(out, data, stride, count); break;
+	case 3: r300EmitVec12(out, data, stride, count); break;
+	case 4: r300EmitVec16(out, data, stride, count); break;
 	default:
 		assert(0);
 		break;
@@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx)
 	}

 	for (i = 0; i < nr; i++) {
-		int ci, fix, found = 0;
+		int ci;

 		swizzle[i][0] = SWIZZLE_ZERO;
 		swizzle[i][1] = SWIZZLE_ZERO;
@@ -444,48 +430,10 @@ int r300EmitArrays(GLcontext * ctx)
 			swizzle[i][ci] = ci;
 		}

-		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
-			if (vb->AttribPtr[tab[i]]->stride % 4) {
-				return R300_FALLBACK_TCL;
-			}
-			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
-			rmesa->state.aos[i].start = 0;
-			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
-			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
-			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
-		} else {
-			r300EmitVec(ctx, &rmesa->state.aos[i],
-				    vb->AttribPtr[tab[i]]->data,
-				    vb->AttribPtr[tab[i]]->size,
-				    vb->AttribPtr[tab[i]]->stride, count);
-		}
-
-		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
-
-		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
-			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
-				continue;
-			}
-			found = 1;
-			break;
-		}
-
-		if (found) {
-			if (fix > 0) {
-				WARN_ONCE("Feeling lucky?\n");
-			}
-			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
-			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
-				swizzle[i][ci] += fix;
-			}
-		} else {
-			WARN_ONCE
-			    ("Cannot handle offset %x with stride %d, comp %d\n",
-			     rmesa->state.aos[i].aos_offset,
-			     rmesa->state.aos[i].aos_stride,
-			     vb->AttribPtr[tab[i]]->size);
-			return R300_FALLBACK_TCL;
-		}
+		r300EmitVec(ctx, &rmesa->state.aos[i],
+				vb->AttribPtr[tab[i]]->data,
+				vb->AttribPtr[tab[i]]->size,
+				vb->AttribPtr[tab[i]]->stride, count);
 	}

 	/* Setup INPUT_ROUTE. */
@@ -515,45 +463,76 @@ int r300EmitArrays(GLcontext * ctx)
 	return R300_FALLBACK_NONE;
 }

-#ifdef USER_BUFFERS
-void r300UseArrays(GLcontext * ctx)
-{
-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	int i;
-
-	if (rmesa->state.elt_dma.buf)
-		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
-
-	for (i = 0; i < rmesa->state.aos_count; i++) {
-		if (rmesa->state.aos[i].buf)
-			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
-	}
-}
-#endif
-
 void r300ReleaseArrays(GLcontext * ctx)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	int i;

-	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
+	if (rmesa->state.elt_dma_bo) {
+		dri_bo_unreference(rmesa->state.elt_dma_bo);
+		rmesa->state.elt_dma_bo = 0;
+	}
 	for (i = 0; i < rmesa->state.aos_count; i++) {
-		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
+		if (rmesa->state.aos[i].bo) {
+			dri_bo_unreference(rmesa->state.aos[i].bo);
+			rmesa->state.aos[i].bo = 0;
+		}
 	}
 }

 void r300EmitCacheFlush(r300ContextPtr rmesa)
 {
-	int cmd_reserved = 0;
-	int cmd_written = 0;
-
-	drm_radeon_cmd_header_t *cmd = NULL;
-
-	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
-	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+	BATCH_LOCALS(rmesa);
+
+	BEGIN_BATCH(4);
+	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
+		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+	END_BATCH();
+	COMMIT_BATCH();
+}

-	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
-	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+void r300EmitBlit(r300ContextPtr rmesa,
+		  GLuint color_fmt,
+		  GLuint src_pitch,
+		  dri_bo *src_bo, int src_offset,
+		  GLuint dst_pitch,
+		  GLuint dst_offset,
+		  GLint srcx, GLint srcy,
+		  GLint dstx, GLint dsty, GLuint w, GLuint h)
+{
+	BATCH_LOCALS(rmesa);
+
+	if (RADEON_DEBUG & DEBUG_IOCTL)
+		fprintf(stderr,
+			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
+			dst_pitch, dst_offset, dstx, dsty, w, h);
+
+	assert((src_pitch & 63) == 0);
+	assert((dst_pitch & 63) == 0);
+	assert((src_offset & 1023) == 0);
+	assert((dst_offset & 1023) == 0);
+	assert(w < (1 << 16));
+	assert(h < (1 << 16));
+
+	BEGIN_BATCH(8);
+	OUT_BATCH_PACKET3(R300_CP_CMD_BITBLT_MULTI, 5);
+	OUT_BATCH(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+		  RADEON_GMC_BRUSH_NONE |
+		  (color_fmt << 8) |
+		  RADEON_GMC_SRC_DATATYPE_COLOR |
+		  RADEON_ROP3_S |
+		  RADEON_DP_SRC_SOURCE_MEMORY |
+		  RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+	OUT_BATCH_RELOC((src_pitch / 64) << 22, src_bo, src_offset, DRM_RELOC_BLITTER);
+	OUT_BATCH(((dst_pitch / 64) << 22) | (dst_offset >> 10));
+	OUT_BATCH((srcx << 16) | srcy);
+	OUT_BATCH((dstx << 16) | dsty);
+	OUT_BATCH((w << 16) | h);
+	END_BATCH();
 }
diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
index 5950539..179983d 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.h
+++ b/src/mesa/drivers/dri/r300/r300_emit.h
@@ -127,130 +127,62 @@ static INLINE uint32_t cmdpacify(void)
 	return cmd.u;
 }

-/**
- * Prepare to write a register value to register at address reg.
- * If num_extra > 0 then the following extra values are written
- * to registers with address +4, +8 and so on..
- */
-#define reg_start(reg, num_extra)					\
-	do {								\
-		int _n;							\
-		_n=(num_extra);						\
-		cmd = (drm_radeon_cmd_header_t*)			\
-			r300AllocCmdBuf(rmesa,				\
-					(_n+2),				\
-					__FUNCTION__);			\
-		cmd_reserved=_n+2;					\
-		cmd_written=1;						\
-		cmd[0].i=cmdpacket0((reg), _n+1);			\
-	} while (0);
+
+/** Single register write to command buffer; requires 2 dwords. */
+#define OUT_BATCH_REGVAL(reg, val) \
+	OUT_BATCH(cmdpacket0((reg), 1)); \
+	OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define OUT_BATCH_REGSEQ(reg, count) \
+	OUT_BATCH(cmdpacket0((reg), (count)));
+
+/** Write a 32 bit float to the ring; requires 1 dword. */
+#define OUT_BATCH_FLOAT32(f) \
+	OUT_BATCH(r300PackFloat32((f)));

 /**
- * Emit GLuint freestyle
+ * Write the header of a packet3 to the command buffer.
+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
  */
-#define e32(dword)							\
-	do {								\
-		if(cmd_written<cmd_reserved) {				\
-			cmd[cmd_written].i=(dword);			\
-			cmd_written++;					\
-		} else {						\
-			fprintf(stderr,					\
-				"e32 but no previous packet "		\
-				"declaration.\n"			\
-				"Aborting! in %s::%s at line %d, "	\
-				"cmd_written=%d cmd_reserved=%d\n",	\
-				__FILE__, __FUNCTION__, __LINE__,	\
-				cmd_written, cmd_reserved);		\
-			_mesa_exit(-1);					\
-		}							\
+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
+	OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_RAW)); \
+	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
 	} while(0)

-#define	efloat(f) e32(r300PackFloat32(f))
-
-#define vsf_start_fragment(dest, length)				\
-	do {								\
-		int _n;							\
-		_n = (length);						\
-		cmd = (drm_radeon_cmd_header_t*)			\
-			r300AllocCmdBuf(rmesa,				\
-					(_n+1),				\
-					__FUNCTION__);			\
-		cmd_reserved = _n+2;					\
-		cmd_written =1;						\
-		cmd[0].i = cmdvpu((dest), _n/4);			\
-	} while (0);
-
-#define r500fp_start_fragment(dest, length)				\
-	do {								\
-		int _n;							\
-		_n = (length);						\
-		cmd = (drm_radeon_cmd_header_t*)			\
-			r300AllocCmdBuf(rmesa,				\
-					(_n+1),				\
-					__FUNCTION__);			\
-		cmd_reserved = _n+1;					\
-		cmd_written =1;						\
-		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
-	} while (0);
-
-#define start_packet3(packet, count)					\
-	{								\
-		int _n;							\
-		GLuint _p;						\
-		_n = (count);						\
-		_p = (packet);						\
-		cmd = (drm_radeon_cmd_header_t*)			\
-			r300AllocCmdBuf(rmesa,				\
-					(_n+3),				\
-					__FUNCTION__);			\
-		cmd_reserved = _n+3;					\
-		cmd_written = 2;					\
-		if(_n > 0x3fff) {					\
-			fprintf(stderr,"Too big packet3 %08x: cannot "	\
-				"store %d dwords\n",			\
-				_p, _n);				\
-			_mesa_exit(-1);					\
-		}							\
-		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
-		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
-	}
-
 /**
  * Must be sent to switch to 2d commands
  */
 void static INLINE end_3d(r300ContextPtr rmesa)
 {
-	drm_radeon_cmd_header_t *cmd = NULL;
+	BATCH_LOCALS(rmesa);

-	cmd =
-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
-	cmd[0].header.cmd_type = R300_CMD_END3D;
+	BEGIN_BATCH(1);
+	OUT_BATCH(cmdpacify());
+	END_BATCH();
 }

 void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
 {
-	drm_radeon_cmd_header_t *cmd = NULL;
+	BATCH_LOCALS(rmesa);

-	cmd =
-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
-	cmd[0].i = cmdcpdelay(count);
+	BEGIN_BATCH(1);
+	OUT_BATCH(cmdcpdelay(count));
+	END_BATCH();
 }

 void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
 {
-	drm_radeon_cmd_header_t *cmd = NULL;
+	BATCH_LOCALS(rmesa);

-	cmd =
-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
-	cmd[0].i = cmdwait(flags);
+	BEGIN_BATCH(1);
+	OUT_BATCH(cmdwait(flags));
+	END_BATCH();
 }

 extern int r300EmitArrays(GLcontext * ctx);

-#ifdef USER_BUFFERS
-void r300UseArrays(GLcontext * ctx);
-#endif
-
 extern void r300ReleaseArrays(GLcontext * ctx);
 extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
 extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
@@ -265,4 +197,13 @@ extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
 extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten);
 extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten);

+extern void r300EmitBlit(r300ContextPtr rmesa,
+			 GLuint color_fmt,
+			 GLuint src_pitch,
+			 dri_bo *src_bo, int src_offset,
+			 GLuint dst_pitch,
+			 GLuint dst_offset,
+			 GLint srcx, GLint srcy,
+			 GLint dstx, GLint dsty, GLuint w, GLuint h);
+
 #endif
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index bd7f060..046f9a2 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_reg.h"
 #include "r300_emit.h"
 #include "r300_fragprog.h"
+#include "r300_mem.h"

 #include "vblank.h"

@@ -62,64 +63,51 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define CLEARBUFFER_DEPTH	0x2
 #define CLEARBUFFER_STENCIL	0x4

-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
+static void r300ClearBuffer(r300ContextPtr r300, int flags,
+			    struct radeon_renderbuffer *rrb)
 {
+	BATCH_LOCALS(r300);
 	GLcontext *ctx = r300->radeon.glCtx;
 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
-	GLuint cboffset, cbpitch;
-	drm_r300_cmd_header_t *cmd2;
-	int cmd_reserved = 0;
-	int cmd_written = 0;
-	drm_radeon_cmd_header_t *cmd = NULL;
+	GLuint cbpitch = 0;
 	r300ContextPtr rmesa = r300;

 	if (RADEON_DEBUG & DEBUG_IOCTL)
-		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
-			__FUNCTION__, buffer ? "back" : "front",
-			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
-
-	if (buffer) {
-		cboffset = r300->radeon.radeonScreen->backOffset;
-		cbpitch = r300->radeon.radeonScreen->backPitch;
-	} else {
-		cboffset = r300->radeon.radeonScreen->frontOffset;
-		cbpitch = r300->radeon.radeonScreen->frontPitch;
+		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
+			__FUNCTION__, rrb, dPriv->x, dPriv->y,
+			dPriv->w, dPriv->h);
+
+	if (rrb) {
+		cbpitch = rrb->pitch;
+		if (rrb->cpp == 4)
+			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+		else
+			cbpitch |= R300_COLOR_FORMAT_RGB565;
+
+		if (r300->radeon.sarea->tiling_enabled)
+			cbpitch |= R300_COLOR_TILE_ENABLE;
 	}

-	cboffset += r300->radeon.radeonScreen->fbLocation;
-
+	/* TODO in bufmgr */
 	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
 	end_3d(rmesa);

-	R300_STATECHANGE(r300, cb);
-	reg_start(R300_RB3D_COLOROFFSET0, 0);
-	e32(cboffset);
-
-	if (r300->radeon.radeonScreen->cpp == 4)
-		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-	else
-		cbpitch |= R300_COLOR_FORMAT_RGB565;
-
-	if (r300->radeon.sarea->tiling_enabled)
-		cbpitch |= R300_COLOR_TILE_ENABLE;
-
-	reg_start(R300_RB3D_COLORPITCH0, 0);
-	e32(cbpitch);
-
-	R300_STATECHANGE(r300, cmk);
-	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
+	BEGIN_BATCH(19);
+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
+	OUT_BATCH_RELOC(0, rrb->bo, 0, DRM_RELOC_TXOFFSET);
+	OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);

+	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
 	if (flags & CLEARBUFFER_COLOR) {
-		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
-		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
-		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
-		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
+		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
 	} else {
-		e32(0x0);
+		OUT_BATCH(0);
 	}

-	R300_STATECHANGE(r300, zs);
-	reg_start(R300_ZB_CNTL, 2);
+	OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);

 	{
 		uint32_t t1, t2;
@@ -146,37 +134,37 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
 		}

-		e32(t1);
-		e32(t2);
-		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
-		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
+		OUT_BATCH(t1);
+		OUT_BATCH(t2);
+		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
+			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
 	}

-	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
-	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
-	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
-	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
-	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
-	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
-	cmd2[4].u = r300PackFloat32(1.0);
-	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
-	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
-	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
-	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
+	OUT_BATCH(cmdpacket3(R300_CMD_PACKET3_CLEAR));
+	OUT_BATCH_FLOAT32(dPriv->w / 2.0);
+	OUT_BATCH_FLOAT32(dPriv->h / 2.0);
+	OUT_BATCH_FLOAT32(ctx->Depth.Clear);
+	OUT_BATCH_FLOAT32(1.0);
+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
+	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
+	END_BATCH();

 	r300EmitCacheFlush(rmesa);
 	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+
+	R300_STATECHANGE(r300, cb);
+	R300_STATECHANGE(r300, cmk);
+	R300_STATECHANGE(r300, zs);
 }

 static void r300EmitClearState(GLcontext * ctx)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	r300ContextPtr rmesa = r300;
+	BATCH_LOCALS(r300);
 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
 	int i;
-	int cmd_reserved = 0;
-	int cmd_written = 0;
-	drm_radeon_cmd_header_t *cmd = NULL;
 	int has_tcl = 1;
 	int is_r500 = 0;
 	GLuint vap_cntl;
@@ -184,35 +172,37 @@ static void r300EmitClearState(GLcontext * ctx)
 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
 		has_tcl = 0;

-        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-                is_r500 = 1;
-
+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+		is_r500 = 1;

-	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
-	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
-	 * quite complex; see the functions in r300_emit.c.
+	/* State atom dirty tracking is a little subtle here.
+	 *
+	 * On the one hand, we need to make sure base state is emitted
+	 * here if we start with an empty batch buffer, otherwise clear
+	 * works incorrectly with multiple processes. Therefore, the first
+	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
 	 *
-	 * I believe it would be a good idea to extend the functions in
-	 * r300_emit.c so that they can be used to setup the default values for
-	 * these registers, as well as the actual values used for rendering.
+	 * On the other hand, implicit state emission clears the state atom
+	 * dirty bits, so we have to call R300_STATECHANGE later than the
+	 * first BEGIN_BATCH.
+	 *
+	 * The final trickiness is that, because we change state, we need
+	 * to ensure that any stored swtcl primitives are flushed properly
+	 * before we start changing state. See the R300_NEWPRIM in r300Clear
+	 * for this.
 	 */
-	R300_STATECHANGE(r300, vir[0]);
-	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
+	BEGIN_BATCH(31);
+	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
 	if (!has_tcl)
-	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
 	else
-	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));

-	/* disable fog */
-	R300_STATECHANGE(r300, fogs);
-	reg_start(R300_FG_FOG_BLEND, 0);
-	e32(0x0);
-
-	R300_STATECHANGE(r300, vir[1]);
-	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
-	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
+	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
@@ -226,238 +216,246 @@ static void r300EmitClearState(GLcontext * ctx)
 	      << R300_SWIZZLE1_SHIFT)));

 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
-	R300_STATECHANGE(r300, vic);
-	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
-	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
-	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
+	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
+	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
+	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);

-	R300_STATECHANGE(r300, vte);
 	/* comes from fglrx startup of clear */
-	reg_start(R300_SE_VTE_CNTL, 1);
-	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
-	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-	    R300_VPORT_Z_OFFSET_ENA);
-	e32(0x8);
+	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
+	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+		  R300_VPORT_Z_OFFSET_ENA);
+	OUT_BATCH(0x8);

-	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
-	e32(0xaaaaaaaa);
+	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);

-	R300_STATECHANGE(r300, vof);
-	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
-	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-	e32(0x0);		/* no textures */
+	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+	OUT_BATCH(0); /* no textures */

-	R300_STATECHANGE(r300, txe);
-	reg_start(R300_TX_ENABLE, 0);
-	e32(0x0);
+	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);

-	R300_STATECHANGE(r300, vpt);
-	reg_start(R300_SE_VPORT_XSCALE, 5);
-	efloat(1.0);
-	efloat(dPriv->x);
-	efloat(1.0);
-	efloat(dPriv->y);
-	efloat(1.0);
-	efloat(0.0);
+	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
+	OUT_BATCH_FLOAT32(1.0);
+	OUT_BATCH_FLOAT32(dPriv->x);
+	OUT_BATCH_FLOAT32(1.0);
+	OUT_BATCH_FLOAT32(dPriv->y);
+	OUT_BATCH_FLOAT32(1.0);
+	OUT_BATCH_FLOAT32(0.0);

-	R300_STATECHANGE(r300, at);
-	reg_start(R300_FG_ALPHA_FUNC, 0);
-	e32(0x0);
+	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
+
+	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
+	OUT_BATCH(0x0);
+	OUT_BATCH(0x0);
+	END_BATCH();

+	R300_STATECHANGE(r300, vir[0]);
+	R300_STATECHANGE(r300, fogs);
+	R300_STATECHANGE(r300, vir[1]);
+	R300_STATECHANGE(r300, vic);
+	R300_STATECHANGE(r300, vte);
+	R300_STATECHANGE(r300, vof);
+	R300_STATECHANGE(r300, txe);
+	R300_STATECHANGE(r300, vpt);
+	R300_STATECHANGE(r300, at);
 	R300_STATECHANGE(r300, bld);
-	reg_start(R300_RB3D_CBLEND, 1);
-	e32(0x0);
-	e32(0x0);
+	R300_STATECHANGE(r300, ps);

 	if (has_tcl) {
-	    R300_STATECHANGE(r300, vap_clip_cntl);
-	    reg_start(R300_VAP_CLIP_CNTL, 0);
-	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
+		R300_STATECHANGE(r300, vap_clip_cntl);
+
+		BEGIN_BATCH_NO_AUTOSTATE(2);
+		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
+		END_BATCH();
         }

-	R300_STATECHANGE(r300, ps);
-	reg_start(R300_GA_POINT_SIZE, 0);
-	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
-	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
+	BEGIN_BATCH_NO_AUTOSTATE(2);
+	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
+		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
+		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
+	END_BATCH();

 	if (!is_r500) {
 		R300_STATECHANGE(r300, ri);
-		reg_start(R300_RS_IP_0, 7);
-		for (i = 0; i < 8; ++i) {
-			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
-		}
-
 		R300_STATECHANGE(r300, rc);
-		/* The second constant is needed to get glxgears display anything .. */
-		reg_start(R300_RS_COUNT, 1);
-		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-		e32(0x0);
-
 		R300_STATECHANGE(r300, rr);
-		reg_start(R300_RS_INST_0, 0);
-		e32(R300_RS_INST_COL_CN_WRITE);
+
+		BEGIN_BATCH(14);
+		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
+		for (i = 0; i < 8; ++i)
+			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
+
+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+		OUT_BATCH(0x0);
+
+		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
+		END_BATCH();
 	} else {
 		R300_STATECHANGE(r300, ri);
-		reg_start(R500_RS_IP_0, 7);
+		R300_STATECHANGE(r300, rc);
+		R300_STATECHANGE(r300, rr);
+
+		BEGIN_BATCH(14);
+		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
 		for (i = 0; i < 8; ++i) {
-			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
 		}

-		R300_STATECHANGE(r300, rc);
-		/* The second constant is needed to get glxgears display anything .. */
-		reg_start(R300_RS_COUNT, 1);
-		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-		e32(0x0);
-
-		R300_STATECHANGE(r300, rr);
-		reg_start(R500_RS_INST_0, 0);
-		e32(R500_RS_INST_COL_CN_WRITE);
+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+		OUT_BATCH(0x0);

+		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
+		END_BATCH();
 	}

 	if (!is_r500) {
 		R300_STATECHANGE(r300, fp);
-		reg_start(R300_US_CONFIG, 2);
-		e32(0x0);
-		e32(0x0);
-		e32(0x0);
-		reg_start(R300_US_CODE_ADDR_0, 3);
-		e32(0x0);
-		e32(0x0);
-		e32(0x0);
-		e32(R300_RGBA_OUT);
-
 		R300_STATECHANGE(r300, fpi[0]);
 		R300_STATECHANGE(r300, fpi[1]);
 		R300_STATECHANGE(r300, fpi[2]);
 		R300_STATECHANGE(r300, fpi[3]);

-		reg_start(R300_US_ALU_RGB_INST_0, 0);
-		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-
-		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
-		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-
-		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
-		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-
-		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
-		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+		BEGIN_BATCH(17);
+		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
+		OUT_BATCH(0x0);
+		OUT_BATCH(0x0);
+		OUT_BATCH(0x0);
+		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
+		OUT_BATCH(0x0);
+		OUT_BATCH(0x0);
+		OUT_BATCH(0x0);
+		OUT_BATCH(R300_RGBA_OUT);
+
+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
+			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
+			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
+			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
+			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
+		END_BATCH();
 	} else {
- 		R300_STATECHANGE(r300, fp);
- 		reg_start(R500_US_CONFIG, 1);
- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
- 		e32(0x0);
- 		reg_start(R500_US_CODE_ADDR, 2);
- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
- 		e32(R500_US_CODE_OFFSET_ADDR(0));
-
+		R300_STATECHANGE(r300, fp);
 		R300_STATECHANGE(r300, r500fp);
-		r500fp_start_fragment(0, 6);
-
-		e32(R500_INST_TYPE_OUT |
-		    R500_INST_TEX_SEM_WAIT |
-		    R500_INST_LAST |
-		    R500_INST_RGB_OMASK_R |
-		    R500_INST_RGB_OMASK_G |
-		    R500_INST_RGB_OMASK_B |
-		    R500_INST_ALPHA_OMASK |
-		    R500_INST_RGB_CLAMP |
-		    R500_INST_ALPHA_CLAMP);
-
-		e32(R500_RGB_ADDR0(0) |
-		    R500_RGB_ADDR1(0) |
-		    R500_RGB_ADDR1_CONST |
-		    R500_RGB_ADDR2(0) |
-		    R500_RGB_ADDR2_CONST);
-
-		e32(R500_ALPHA_ADDR0(0) |
-		    R500_ALPHA_ADDR1(0) |
-		    R500_ALPHA_ADDR1_CONST |
-		    R500_ALPHA_ADDR2(0) |
-		    R500_ALPHA_ADDR2_CONST);
-
-		e32(R500_ALU_RGB_SEL_A_SRC0 |
-		    R500_ALU_RGB_R_SWIZ_A_R |
-		    R500_ALU_RGB_G_SWIZ_A_G |
-		    R500_ALU_RGB_B_SWIZ_A_B |
-		    R500_ALU_RGB_SEL_B_SRC0 |
-		    R500_ALU_RGB_R_SWIZ_B_R |
-		    R500_ALU_RGB_B_SWIZ_B_G |
-		    R500_ALU_RGB_G_SWIZ_B_B);
-
-		e32(R500_ALPHA_OP_CMP |
-		    R500_ALPHA_SWIZ_A_A |
-		    R500_ALPHA_SWIZ_B_A);
-
-		e32(R500_ALU_RGBA_OP_CMP |
-		    R500_ALU_RGBA_R_SWIZ_0 |
-		    R500_ALU_RGBA_G_SWIZ_0 |
-		    R500_ALU_RGBA_B_SWIZ_0 |
-		    R500_ALU_RGBA_A_SWIZ_0);
+
+		BEGIN_BATCH(14);
+		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
+		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+		OUT_BATCH(0x0);
+		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
+		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
+
+		OUT_BATCH(cmdr500fp(0, 1, 0, 0));
+		OUT_BATCH(R500_INST_TYPE_OUT |
+			  R500_INST_TEX_SEM_WAIT |
+			  R500_INST_LAST |
+			  R500_INST_RGB_OMASK_R |
+			  R500_INST_RGB_OMASK_G |
+			  R500_INST_RGB_OMASK_B |
+			  R500_INST_ALPHA_OMASK |
+			  R500_INST_RGB_CLAMP |
+			  R500_INST_ALPHA_CLAMP);
+		OUT_BATCH(R500_RGB_ADDR0(0) |
+			  R500_RGB_ADDR1(0) |
+			  R500_RGB_ADDR1_CONST |
+			  R500_RGB_ADDR2(0) |
+			  R500_RGB_ADDR2_CONST);
+		OUT_BATCH(R500_ALPHA_ADDR0(0) |
+			  R500_ALPHA_ADDR1(0) |
+			  R500_ALPHA_ADDR1_CONST |
+			  R500_ALPHA_ADDR2(0) |
+			  R500_ALPHA_ADDR2_CONST);
+		OUT_BATCH(R500_ALU_RGB_SEL_A_SRC0 |
+			  R500_ALU_RGB_R_SWIZ_A_R |
+			  R500_ALU_RGB_G_SWIZ_A_G |
+			  R500_ALU_RGB_B_SWIZ_A_B |
+			  R500_ALU_RGB_SEL_B_SRC0 |
+			  R500_ALU_RGB_R_SWIZ_B_R |
+			  R500_ALU_RGB_B_SWIZ_B_G |
+			  R500_ALU_RGB_G_SWIZ_B_B);
+		OUT_BATCH(R500_ALPHA_OP_CMP |
+			  R500_ALPHA_SWIZ_A_A |
+			  R500_ALPHA_SWIZ_B_A);
+		OUT_BATCH(R500_ALU_RGBA_OP_CMP |
+			  R500_ALU_RGBA_R_SWIZ_0 |
+			  R500_ALU_RGBA_G_SWIZ_0 |
+			  R500_ALU_RGBA_B_SWIZ_0 |
+			  R500_ALU_RGBA_A_SWIZ_0);
+		END_BATCH();
 	}

-	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-	e32(0x00000000);
+	BEGIN_BATCH(2);
+	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+	END_BATCH();
+
 	if (has_tcl) {
-	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
-	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
-	} else
-	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+	} else {
+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
+	}

 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
-	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
-	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
-	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
-	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
 	else
-	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+
+	R300_STATECHANGE(r300, vap_cntl);

-	R300_STATECHANGE(rmesa, vap_cntl);
-	reg_start(R300_VAP_CNTL, 0);
-	e32(vap_cntl);
+	BEGIN_BATCH(2);
+	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
+	END_BATCH();

 	if (has_tcl) {
 		R300_STATECHANGE(r300, pvs);
-		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
-
-		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
-		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-		    (1 << R300_PVS_LAST_INST_SHIFT));
-		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
-		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-
 		R300_STATECHANGE(r300, vpi);
-		vsf_start_fragment(0x0, 8);
-
-		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
-		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-		e32(0x0);

-		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
-		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-		e32(0x0);
+		BEGIN_BATCH(13);
+		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
+		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
+			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			  (1 << R300_PVS_LAST_INST_SHIFT));
+		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
+		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+
+		OUT_BATCH(cmdvpu(0, 2));
+		OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
+		OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+		OUT_BATCH(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+		OUT_BATCH(0x0);
+
+		OUT_BATCH(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
+		OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+		OUT_BATCH(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+		OUT_BATCH(0x0);
+		END_BATCH();
 	}
 }

@@ -467,7 +465,10 @@ static void r300EmitClearState(GLcontext * ctx)
 static void r300Clear(GLcontext * ctx, GLbitfield mask)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	BATCH_LOCALS(r300);
 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+	GLframebuffer *fb = dPriv->driverPrivate;
+	struct radeon_renderbuffer *rrb;
 	int flags = 0;
 	int bits = 0;
 	int swapped;
@@ -482,6 +483,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
 			return;
 	}

+	/* Flush swtcl vertices if necessary, because we will change hardware
+	 * state during clear. See also the state-related comment in
+	 * r300EmitClearState.
+	 */
+	R300_NEWPRIM(r300);
+
 	if (mask & BUFFER_BIT_FRONT_LEFT) {
 		flags |= BUFFER_BIT_FRONT_LEFT;
 		mask &= ~BUFFER_BIT_FRONT_LEFT;
@@ -509,26 +516,27 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
 		_swrast_Clear(ctx, mask);
 	}

-	swapped = r300->radeon.sarea->pfCurrentPage == 1;
-
 	/* Make sure it fits there. */
 	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
 	if (flags || bits)
 		r300EmitClearState(ctx);

 	if (flags & BUFFER_BIT_FRONT_LEFT) {
-		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
+		rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb);
 		bits = 0;
 	}

 	if (flags & BUFFER_BIT_BACK_LEFT) {
-		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
+		rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb);
 		bits = 0;
 	}

 	if (bits)
-		r300ClearBuffer(r300, bits, 0);
+		r300ClearBuffer(r300, bits, NULL);

+	COMMIT_BATCH();
 }

 void r300Flush(GLcontext * ctx)
@@ -541,16 +549,12 @@ void r300Flush(GLcontext * ctx)
 	if (rmesa->dma.flush)
 		rmesa->dma.flush( rmesa );

-	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
+	if (rmesa->cmdbuf.committed > rmesa->cmdbuf.reemit)
 		r300FlushCmdBuf(rmesa, __FUNCTION__);
 }

-#ifdef USER_BUFFERS
-#include "r300_mem.h"
-
 void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
 {
-	struct r300_dma_buffer *dmabuf;
 	size = MAX2(size, RADEON_BUFFER_SIZE * 16);

 	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
@@ -560,71 +564,24 @@ void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
 		rmesa->dma.flush(rmesa);
 	}

-	if (rmesa->dma.current.buf) {
-#ifdef USER_BUFFERS
-		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
-#endif
-		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+	if (rmesa->dma.current) {
+		dri_bo_unreference(rmesa->dma.current);
+		rmesa->dma.current = 0;
 	}
 	if (rmesa->dma.nr_released_bufs > 4)
 		r300FlushCmdBuf(rmesa, __FUNCTION__);

-	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
-	dmabuf->buf = (void *)1;	/* hack */
-	dmabuf->refcount = 1;
-
-	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
-	if (dmabuf->id == 0) {
-		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
-
-		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
-		radeonWaitForIdleLocked(&rmesa->radeon);
-
-		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
-
-		UNLOCK_HARDWARE(&rmesa->radeon);
-
-		if (dmabuf->id == 0) {
-			fprintf(stderr,
-				"Error: Could not get dma buffer... exiting\n");
-			_mesa_exit(-1);
-		}
-	}
-
-	rmesa->dma.current.buf = dmabuf;
-	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
-	rmesa->dma.current.end = size;
-	rmesa->dma.current.start = 0;
-	rmesa->dma.current.ptr = 0;
-}
-
-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
-			  struct r300_dma_region *region, const char *caller)
-{
-	if (RADEON_DEBUG & DEBUG_IOCTL)
-		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
-	if (!region->buf)
-		return;
-
-	if (rmesa->dma.flush)
-		rmesa->dma.flush(rmesa);
-
-	if (--region->buf->refcount == 0) {
-		r300_mem_free(rmesa, region->buf->id);
-		FREE(region->buf);
-		rmesa->dma.nr_released_bufs++;
-	}
-
-	region->buf = 0;
-	region->start = 0;
+	rmesa->dma.current = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "DMA regions",
+		size, 4, DRM_BO_MEM_DMA);
+	rmesa->dma.current_used = 0;
+	rmesa->dma.current_vertexptr = 0;
 }

 /* Allocates a region from rmesa->dma.current.  If there isn't enough
  * space in current, grab a new buffer (and discard what was left of current)
  */
 void r300AllocDmaRegion(r300ContextPtr rmesa,
-			struct r300_dma_region *region,
+			dri_bo **pbo, int *poffset,
 			int bytes, int alignment)
 {
 	if (RADEON_DEBUG & DEBUG_IOCTL)
@@ -633,207 +590,23 @@ void r300AllocDmaRegion(r300ContextPtr rmesa,
 	if (rmesa->dma.flush)
 		rmesa->dma.flush(rmesa);

-	if (region->buf)
-		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);

 	alignment--;
-	rmesa->dma.current.start = rmesa->dma.current.ptr =
-	    (rmesa->dma.current.ptr + alignment) & ~alignment;
-
-	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
-		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
-
-	region->start = rmesa->dma.current.start;
-	region->ptr = rmesa->dma.current.start;
-	region->end = rmesa->dma.current.start + bytes;
-	region->address = rmesa->dma.current.address;
-	region->buf = rmesa->dma.current.buf;
-	region->buf->refcount++;
+	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;

-	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
-	rmesa->dma.current.start =
-	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
-
-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
-}
+	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
+		r300RefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);

-#else
-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
-{
-	struct r300_dma_buffer *dmabuf;
-	int fd = rmesa->radeon.dri.fd;
-	int index = 0;
-	int size = 0;
-	drmDMAReq dma;
-	int ret;
+	*poffset = rmesa->dma.current_used;
+	*pbo = rmesa->dma.current;
+	dri_bo_reference(*pbo);

-	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
-		fprintf(stderr, "%s\n", __FUNCTION__);
+	/* Always align to at least 16 bytes */
+	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;

-	if (rmesa->dma.flush) {
-		rmesa->dma.flush(rmesa);
-	}
-
-	if (rmesa->dma.current.buf)
-		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
-
-	if (rmesa->dma.nr_released_bufs > 4)
-		r300FlushCmdBuf(rmesa, __FUNCTION__);
-
-	dma.context = rmesa->radeon.dri.hwContext;
-	dma.send_count = 0;
-	dma.send_list = NULL;
-	dma.send_sizes = NULL;
-	dma.flags = 0;
-	dma.request_count = 1;
-	dma.request_size = RADEON_BUFFER_SIZE;
-	dma.request_list = &index;
-	dma.request_sizes = &size;
-	dma.granted_count = 0;
-
-	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
-
-	ret = drmDMA(fd, &dma);
-
-	if (ret != 0) {
-		/* Try to release some buffers and wait until we can't get any more */
-		if (rmesa->dma.nr_released_bufs) {
-			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
-		}
-
-		if (RADEON_DEBUG & DEBUG_DMA)
-			fprintf(stderr, "Waiting for buffers\n");
-
-		radeonWaitForIdleLocked(&rmesa->radeon);
-		ret = drmDMA(fd, &dma);
-
-		if (ret != 0) {
-			UNLOCK_HARDWARE(&rmesa->radeon);
-			fprintf(stderr,
-				"Error: Could not get dma buffer... exiting\n");
-			_mesa_exit(-1);
-		}
-	}
-
-	UNLOCK_HARDWARE(&rmesa->radeon);
-
-	if (RADEON_DEBUG & DEBUG_DMA)
-		fprintf(stderr, "Allocated buffer %d\n", index);
-
-	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
-	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
-	dmabuf->refcount = 1;
-
-	rmesa->dma.current.buf = dmabuf;
-	rmesa->dma.current.address = dmabuf->buf->address;
-	rmesa->dma.current.end = dmabuf->buf->total;
-	rmesa->dma.current.start = 0;
-	rmesa->dma.current.ptr = 0;
-}
-
-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
-			  struct r300_dma_region *region, const char *caller)
-{
-	if (RADEON_DEBUG & DEBUG_IOCTL)
-		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
-
-	if (!region->buf)
-		return;
-
-	if (rmesa->dma.flush)
-		rmesa->dma.flush(rmesa);
-
-	if (--region->buf->refcount == 0) {
-		drm_radeon_cmd_header_t *cmd;
-
-		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
-			fprintf(stderr, "%s -- DISCARD BUF %d\n",
-				__FUNCTION__, region->buf->buf->idx);
-		cmd =
-		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
-								sizeof
-								(*cmd) / 4,
-								__FUNCTION__);
-		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
-		cmd->dma.buf_idx = region->buf->buf->idx;
-
-		FREE(region->buf);
-		rmesa->dma.nr_released_bufs++;
-	}
-
-	region->buf = 0;
-	region->start = 0;
-}
-
-/* Allocates a region from rmesa->dma.current.  If there isn't enough
- * space in current, grab a new buffer (and discard what was left of current)
- */
-void r300AllocDmaRegion(r300ContextPtr rmesa,
-			struct r300_dma_region *region,
-			int bytes, int alignment)
-{
-	if (RADEON_DEBUG & DEBUG_IOCTL)
-		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-
-	if (rmesa->dma.flush)
-		rmesa->dma.flush(rmesa);
-
-	if (region->buf)
-		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
-
-	alignment--;
-	rmesa->dma.current.start = rmesa->dma.current.ptr =
-	    (rmesa->dma.current.ptr + alignment) & ~alignment;
-
-	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
-		r300RefillCurrentDmaRegion(rmesa);
-
-	region->start = rmesa->dma.current.start;
-	region->ptr = rmesa->dma.current.start;
-	region->end = rmesa->dma.current.start + bytes;
-	region->address = rmesa->dma.current.address;
-	region->buf = rmesa->dma.current.buf;
-	region->buf->refcount++;
-
-	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
-	rmesa->dma.current.start =
-	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
-
-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
-}
-
-#endif
-
-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
-			   GLint size)
-{
-	int offset =
-	    (char *)pointer -
-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-	int valid = (size >= 0 && offset >= 0
-		     && offset + size <
-		     rmesa->radeon.radeonScreen->gartTextures.size);
-
-	if (RADEON_DEBUG & DEBUG_IOCTL)
-		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
-			valid);
-
-	return valid;
-}
-
-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
-{
-	int offset =
-	    (char *)pointer -
-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-
-	//fprintf(stderr, "offset=%08x\n", offset);
-
-	if (offset < 0
-	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
-		return ~0;
-	else
-		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
+	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
 }

 void r300InitIoctlFuncs(struct dd_function_table *functions)
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
index e1143fb..c743478 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.h
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_context.h"
 #include "radeon_drm.h"

-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
-				  const GLvoid * pointer, GLint size);
-
-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
-					const GLvoid * pointer);
-
 extern void r300Flush(GLcontext * ctx);

-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
-				 struct r300_dma_region *region,
-				 const char *caller);
 extern void r300AllocDmaRegion(r300ContextPtr rmesa,
-			       struct r300_dma_region *region, int bytes,
-			       int alignment);
+			       dri_bo **pbo, int *poffset,
+			       int bytes, int alignment);

 extern void r300InitIoctlFuncs(struct dd_function_table *functions);

diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
index f8f9d4f..b045393 100644
--- a/src/mesa/drivers/dri/r300/r300_mem.c
+++ b/src/mesa/drivers/dri/r300/r300_mem.c
@@ -27,359 +27,843 @@

 /**
  * \file
+ * Simulate a real memory manager for R300 in the old-style scheme.
+ *
+ * NOTE: Right now, this is DMA-only and really only a skeleton of a true bufmgr.
  *
  * \author Aapo Tahkola <aet@rasterburn.org>
  */

+#include "r300_mem.h"
+
+#include <errno.h>
 #include <unistd.h>

-#include "r300_context.h"
-#include "r300_cmdbuf.h"
-#include "r300_ioctl.h"
-#include "r300_mem.h"
+#include "simple_list.h"
+
 #include "radeon_ioctl.h"
+#include "r300_cmdbuf.h"

-#ifdef USER_BUFFERS
+typedef struct _radeon_bufmgr_classic radeon_bufmgr_classic;
+typedef struct _radeon_bo_classic radeon_bo_classic;
+typedef struct _radeon_bo_functions radeon_bo_functions;
+typedef struct _radeon_reloc radeon_reloc;
+typedef struct _radeon_bo_vram radeon_bo_vram;
+
+struct _radeon_bufmgr_classic {
+	radeon_bufmgr base;
+	r300ContextPtr rmesa;
+
+	radeon_bo_classic *buffers; /** Unsorted linked list of all buffer objects */
+
+	radeon_bo_classic *pending; /** Age-sorted linked list of pending buffer objects */
+	radeon_bo_classic **pending_tail;
+
+	/* Texture heap bookkeeping */
+	driTexHeap *texture_heap;
+	GLuint texture_offset;
+	driTextureObject texture_swapped;
+};
+
+struct _radeon_reloc {
+	uint64_t flags;
+	GLuint offset; /**< Offset (in bytes) into command buffer to relocated dword */
+	radeon_bo_classic *target;
+	GLuint delta;
+};
+
+struct _radeon_bo_functions {
+	/**
+	 * Free a buffer object. Caller has verified that the object is not
+	 * referenced or pending.
+	 */
+	void (*free)(radeon_bo_classic*);
+
+	/**
+	 * Validate the given buffer. Must set the validated flag to 1.
+	 *
+	 * May be null for buffer objects that are always valid.
+	 * Always called with lock held.
+	 */
+	void (*validate)(radeon_bo_classic*);
+
+	/**
+	 * Called when a writing map of the buffer is taken, to note that
+	 * the buffer will have to be re-validated.
+	 *
+	 * May be null for buffer objects that don't need it.
+	 */
+	void (*dirty)(radeon_bo_classic*);
+
+	/**
+	 * Indicate that the buffer object is now used by the hardware.
+	 *
+	 * May be null.
+	 */
+	void (*bind)(radeon_bo_classic*);
+
+	/**
+	 * Indicate that the buffer object is no longer used by the hardware.
+	 *
+	 * May be null.
+	 */
+	void (*unbind)(radeon_bo_classic*);
+};

-static void resize_u_list(r300ContextPtr rmesa)
-{
-	void *temp;
-	int nsize;
+/**
+ * A buffer object. There are three types of buffer objects:
+ *  1. cmdbuf: Ordinary malloc()ed memory, used for command buffers
+ *  2. dma: GART memory allocated via the DRM_RADEON_ALLOC ioctl.
+ *  3. vram: Objects with malloc()ed backing store that will be uploaded
+ *     into VRAM on demand; used for textures.
+ * There is a @ref functions table for operations that depend on the
+ * buffer object type.
+ *
+ * Fencing is handled the same way all buffer objects. During command buffer
+ * submission, the pending flag and corresponding variables are set accordingly.
+ */
+struct _radeon_bo_classic {
+	dri_bo base;

-	temp = rmesa->rmm->u_list;
-	nsize = rmesa->rmm->u_size * 2;
+	const radeon_bo_functions *functions;

-	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
-	_mesa_memset(rmesa->rmm->u_list, 0,
-		     nsize * sizeof(*rmesa->rmm->u_list));
+	radeon_bo_classic *next; /** Unsorted linked list of all buffer objects */
+	radeon_bo_classic **pprev;

-	if (temp) {
-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+	/**
+	 * Number of software references to this buffer.
+	 * A buffer is freed automatically as soon as its reference count reaches 0
+	 * *and* it is no longer pending.
+	 */
+	unsigned int refcount;
+	unsigned int mapcount; /** mmap count; mutually exclusive to being pending */

-		_mesa_memcpy(rmesa->rmm->u_list, temp,
-			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
-		_mesa_free(temp);
-	}
+	unsigned int validated:1; /** whether the buffer is validated for hardware use right now */
+	unsigned int used:1; /* only for communication between process_relocs and post_submit */
+
+	unsigned int pending:1;
+	radeon_bo_classic *pending_next; /** Age-sorted linked list of pending buffer objects */
+	radeon_bo_classic **pending_pprev;

-	rmesa->rmm->u_size = nsize;
+	/* The following two variables are intricately linked to the DRM interface,
+	 * and must be in this physical memory order, or else chaos ensues.
+	 * See the DRM's implementation of R300_CMD_SCRATCH for details.
+	 */
+	uint32_t pending_age; /** Buffer object pending until this age is reached, written by the DRM */
+	uint32_t pending_count; /** Number of pending R300_CMD_SCRATCH references to this object */
+
+	radeon_reloc *relocs; /** Array of relocations in this buffer */
+	GLuint relocs_used; /** # of relocations in relocation array */
+	GLuint relocs_size; /** # of reloc records reserved in relocation array */
+};
+
+typedef struct _radeon_vram_wrapper radeon_vram_wrapper;
+
+/** Wrapper around heap object */
+struct _radeon_vram_wrapper {
+	driTextureObject base;
+	radeon_bo_vram *bo;
+};
+
+struct _radeon_bo_vram {
+	radeon_bo_classic base;
+
+	unsigned int backing_store_dirty:1; /** Backing store has changed, block must be reuploaded */
+
+	radeon_vram_wrapper *vram; /** Block in VRAM (if any) */
+};
+
+static radeon_bufmgr_classic* get_bufmgr_classic(dri_bufmgr *bufmgr_ctx)
+{
+	return (radeon_bufmgr_classic*)bufmgr_ctx;
 }

-void r300_mem_init(r300ContextPtr rmesa)
+static radeon_bo_classic* get_bo_classic(dri_bo *bo_base)
 {
-	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
-	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
+	return (radeon_bo_classic*)bo_base;
+}

-	rmesa->rmm->u_size = 128;
-	resize_u_list(rmesa);
+static radeon_bo_vram* get_bo_vram(radeon_bo_classic *bo_base)
+{
+	return (radeon_bo_vram*)bo_base;
 }

-void r300_mem_destroy(r300ContextPtr rmesa)
+/**
+ * Really free a given buffer object.
+ */
+static void bo_free(radeon_bo_classic *bo)
 {
-	_mesa_free(rmesa->rmm->u_list);
-	rmesa->rmm->u_list = NULL;
+	assert(!bo->refcount);
+	assert(!bo->pending);
+	assert(!bo->mapcount);
+
+	if (bo->relocs) {
+		int i;
+		for(i = 0; i < bo->relocs_used; ++i)
+			dri_bo_unreference(&bo->relocs[i].target->base);
+		free(bo->relocs);
+		bo->relocs = 0;
+	}
+
+	*bo->pprev = bo->next;
+	if (bo->next)
+		bo->next->pprev = bo->pprev;

-	_mesa_free(rmesa->rmm);
-	rmesa->rmm = NULL;
+	bo->functions->free(bo);
 }

-void *r300_mem_ptr(r300ContextPtr rmesa, int id)
+
+/**
+ * Keep track of which buffer objects are still pending, i.e. waiting for
+ * some hardware operation to complete.
+ */
+static void track_pending_buffers(radeon_bufmgr_classic *bufmgr)
 {
-	assert(id <= rmesa->rmm->u_last);
-	return rmesa->rmm->u_list[id].ptr;
+	uint32_t currentage = radeonGetAge((radeonContextPtr)bufmgr->rmesa);
+
+	while(bufmgr->pending) {
+		radeon_bo_classic *bo = bufmgr->pending;
+
+		assert(bo->pending);
+
+		if (bo->pending_count ||
+		    bo->pending_age > currentage) // TODO: Age counter wraparound!
+			break;
+
+		bo->pending = 0;
+		bufmgr->pending = bo->pending_next;
+		if (bufmgr->pending)
+			bufmgr->pending->pending_pprev = &bufmgr->pending;
+		else
+			bufmgr->pending_tail = &bufmgr->pending;
+
+		if (bo->functions->unbind)
+			(*bo->functions->unbind)(bo);
+		if (!bo->refcount)
+			bo_free(bo);
+	}
 }

-int r300_mem_find(r300ContextPtr rmesa, void *ptr)
+/**
+ * Initialize common buffer object data.
+ */
+static void init_buffer(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo, unsigned long size)
 {
-	int i;
+	bo->base.bufmgr = &bufmgr->base.base;
+	bo->base.size = size;
+	bo->refcount = 1;
+
+	bo->pprev = &bufmgr->buffers;
+	bo->next = bufmgr->buffers;
+	if (bo->next)
+		bo->next->pprev = &bo->next;
+	bufmgr->buffers = bo;
+}

-	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
-		if (rmesa->rmm->u_list[i].ptr &&
-		    ptr >= rmesa->rmm->u_list[i].ptr &&
-		    ptr <
-		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
-			break;

-	if (i < rmesa->rmm->u_size + 1)
-		return i;
+/**
+ * Free a DMA-based buffer.
+ */
+static void dma_free(radeon_bo_classic *bo)
+{
+	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bo->base.bufmgr);
+	drm_radeon_mem_free_t memfree;
+	int ret;
+
+	memfree.region = RADEON_MEM_REGION_GART;
+	memfree.region_offset = bo->base.offset;
+	memfree.region_offset -= bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset;

-	fprintf(stderr, "%p failed\n", ptr);
-	return 0;
+	ret = drmCommandWrite(bufmgr->rmesa->radeon.radeonScreen->driScreen->fd,
+		DRM_RADEON_FREE, &memfree, sizeof(memfree));
+	if (ret) {
+		fprintf(stderr, "Failed to free bo[%p] at %08x\n", bo, memfree.region_offset);
+		fprintf(stderr, "ret = %s\n", strerror(-ret));
+		exit(1);
+	}
+
+	free(bo);
 }

-//#define MM_DEBUG
-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
+static const radeon_bo_functions dma_bo_functions = {
+	.free = &dma_free
+};
+
+/**
+ * Call the DRM to allocate GART memory for the given (incomplete)
+ * buffer object.
+ */
+static int try_dma_alloc(radeon_bufmgr_classic *bufmgr, radeon_bo_classic *bo,
+		unsigned long size, unsigned int alignment)
 {
 	drm_radeon_mem_alloc_t alloc;
-	int offset = 0, ret;
-	int i, free = -1;
-	int done_age;
-	drm_radeon_mem_free_t memfree;
-	int tries = 0;
-	static int bytes_wasted = 0, allocated = 0;
+	int baseoffset;
+	int ret;

-	if (size < 4096)
-		bytes_wasted += 4096 - size;
+	alloc.region = RADEON_MEM_REGION_GART;
+	alloc.alignment = alignment;
+	alloc.size = size;
+	alloc.region_offset = &baseoffset;
+
+	ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd,
+			DRM_RADEON_ALLOC, &alloc, sizeof(alloc));
+	if (ret) {
+		if (RADEON_DEBUG & DEBUG_MEMORY)
+			fprintf(stderr, "DRM_RADEON_ALLOC failed: %d\n", ret);
+		return 0;
+	}

-	allocated += size;
+	bo->base.virtual = (char*)bufmgr->rmesa->radeon.radeonScreen->gartTextures.map + baseoffset;
+	bo->base.offset = bufmgr->rmesa->radeon.radeonScreen->gart_texture_offset + baseoffset;

-#if 0
-	static int t = 0;
-	if (t != time(NULL)) {
-		t = time(NULL);
-		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
-			rmesa->rmm->u_last, bytes_wasted / 1024,
-			allocated / 1024);
+	return 1;
+}
+
+/**
+ * Allocate a DMA buffer.
+ */
+static dri_bo *dma_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
+		unsigned long size, unsigned int alignment)
+{
+	radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic));
+
+	bo->functions = &dma_bo_functions;
+
+	track_pending_buffers(bufmgr);
+	if (!try_dma_alloc(bufmgr, bo, size, alignment)) {
+		if (RADEON_DEBUG & DEBUG_MEMORY)
+			fprintf(stderr, "Failed to allocate %ld bytes, finishing command buffer...\n", size);
+		radeonFinish(bufmgr->rmesa->radeon.glCtx);
+		track_pending_buffers(bufmgr);
+		if (!try_dma_alloc(bufmgr, bo, size, alignment)) {
+			WARN_ONCE(
+				"Ran out of GART memory (for %ld)!\n"
+				"Please consider adjusting GARTSize option.\n",
+				size);
+			free(bo);
+			return 0;
+		}
 	}
-#endif

-	memfree.region = RADEON_MEM_REGION_GART;
+	init_buffer(bufmgr, bo, size);
+	bo->validated = 1; /* DMA buffer offsets are always valid */

-      again:
+	return &bo->base;
+}

-	done_age = radeonGetAge((radeonContextPtr) rmesa);
+/**
+ * Free a command buffer
+ */
+static void cmdbuf_free(radeon_bo_classic *bo)
+{
+	free(bo->base.virtual);
+	free(bo);
+}

-	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
-		resize_u_list(rmesa);
+static const radeon_bo_functions cmdbuf_bo_functions = {
+	.free = cmdbuf_free
+};

-	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
-		if (rmesa->rmm->u_list[i].ptr == NULL) {
-			free = i;
-			continue;
+/**
+ * Allocate a command buffer.
+ *
+ * Command buffers are really just malloc'ed buffers. They are managed by
+ * the bufmgr to enable relocations.
+ */
+static dri_bo *cmdbuf_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
+		unsigned long size)
+{
+	radeon_bo_classic* bo = (radeon_bo_classic*)calloc(1, sizeof(radeon_bo_classic));
+
+	bo->functions = &cmdbuf_bo_functions;
+	bo->base.virtual = malloc(size);
+
+	init_buffer(bufmgr, bo, size);
+	return &bo->base;
+}
+
+/**
+ * Free a VRAM-based buffer object.
+ */
+static void vram_free(radeon_bo_classic *bo_base)
+{
+	radeon_bo_vram *bo = get_bo_vram(bo_base);
+
+	if (bo->vram) {
+		driDestroyTextureObject(&bo->vram->base);
+		bo->vram = 0;
+	}
+
+	free(bo->base.base.virtual);
+	free(bo);
+}
+
+/**
+ * Allocate/update the copy in vram.
+ *
+ * Note: Assume we're called with the DRI lock held.
+ */
+static void vram_validate(radeon_bo_classic *bo_base)
+{
+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->base.bufmgr);
+	radeon_bo_vram *bo = get_bo_vram(bo_base);
+
+	if (!bo->vram) {
+		bo->backing_store_dirty = 1;
+
+		bo->vram = (radeon_vram_wrapper*)calloc(1, sizeof(radeon_vram_wrapper));
+		bo->vram->bo = bo;
+		make_empty_list(&bo->vram->base);
+		bo->vram->base.totalSize = bo->base.base.size;
+		if (driAllocateTexture(&bufmgr->texture_heap, 1, &bo->vram->base) < 0) {
+			fprintf(stderr, "Ouch! vram_validate failed\n");
+			free(bo->vram);
+			bo->base.base.offset = 0;
+			bo->vram = 0;
+			return;
 		}
+	}
+
+	assert(bo->vram->base.memBlock);
+
+	bo->base.base.offset = bufmgr->texture_offset + bo->vram->base.memBlock->ofs;
+
+	if (bo->backing_store_dirty) {
+		/* Copy to VRAM using a blit.
+		 * All memory is 4K aligned. We're using 1024 pixels wide blits.
+		 */
+		drm_radeon_texture_t tex;
+		drm_radeon_tex_image_t tmp;
+		int ret;

-		if (rmesa->rmm->u_list[i].h_pending == 0 &&
-		    rmesa->rmm->u_list[i].pending
-		    && rmesa->rmm->u_list[i].age <= done_age) {
-			memfree.region_offset =
-			    (char *)rmesa->rmm->u_list[i].ptr -
-			    (char *)rmesa->radeon.radeonScreen->gartTextures.
-			    map;
+		tex.offset = bo->base.base.offset;
+		tex.image = &tmp;

-			ret =
-			    drmCommandWrite(rmesa->radeon.radeonScreen->
-					    driScreen->fd, DRM_RADEON_FREE,
-					    &memfree, sizeof(memfree));
+		assert(!(tex.offset & 1023));

+		tmp.x = 0;
+		tmp.y = 0;
+		if (bo->base.base.size < 4096) {
+			tmp.width = (bo->base.base.size + 3) / 4;
+			tmp.height = 1;
+		} else {
+			tmp.width = 1024;
+			tmp.height = (bo->base.base.size + 4095) / 4096;
+		}
+		tmp.data = bo->base.base.virtual;
+
+		tex.format = RADEON_TXFORMAT_ARGB8888;
+		tex.width = tmp.width;
+		tex.height = tmp.height;
+		tex.pitch = MAX2(tmp.width / 16, 1);
+
+		do {
+			ret = drmCommandWriteRead(bufmgr->rmesa->radeon.dri.fd,
+						DRM_RADEON_TEXTURE, &tex,
+						sizeof(drm_radeon_texture_t));
 			if (ret) {
-				fprintf(stderr, "Failed to free at %p\n",
-					rmesa->rmm->u_list[i].ptr);
-				fprintf(stderr, "ret = %s\n", strerror(-ret));
-				exit(1);
-			} else {
-#ifdef MM_DEBUG
-				fprintf(stderr, "really freed %d at age %x\n",
-					i,
-					radeonGetAge((radeonContextPtr) rmesa));
-#endif
-				if (i == rmesa->rmm->u_last)
-					rmesa->rmm->u_last--;
-
-				if (rmesa->rmm->u_list[i].size < 4096)
-					bytes_wasted -=
-					    4096 - rmesa->rmm->u_list[i].size;
-
-				allocated -= rmesa->rmm->u_list[i].size;
-				rmesa->rmm->u_list[i].pending = 0;
-				rmesa->rmm->u_list[i].ptr = NULL;
-				free = i;
+				if (RADEON_DEBUG & DEBUG_IOCTL)
+					fprintf(stderr,
+						"DRM_RADEON_TEXTURE:  again!\n");
+				usleep(1);
 			}
-		}
+		} while (ret == -EAGAIN);
+
+		bo->backing_store_dirty = 0;
 	}
-	rmesa->rmm->u_head = i;
-
-	if (free == -1) {
-		WARN_ONCE("Ran out of slots!\n");
-		//usleep(100);
-		r300FlushCmdBuf(rmesa, __FUNCTION__);
-		tries++;
-		if (tries > 100) {
-			WARN_ONCE("Ran out of slots!\n");
-			exit(1);
-		}
-		goto again;
+
+	bo->base.validated = 1;
+}
+
+static void vram_dirty(radeon_bo_classic *bo_base)
+{
+	radeon_bo_vram *bo = get_bo_vram(bo_base);
+
+	bo->base.validated = 0;
+	bo->backing_store_dirty = 1;
+}
+
+static void vram_bind(radeon_bo_classic *bo_base)
+{
+	radeon_bo_vram *bo = get_bo_vram(bo_base);
+
+	if (bo->vram) {
+		bo->vram->base.bound = 1;
+		driUpdateTextureLRU(&bo->vram->base);
 	}
+}

-	alloc.region = RADEON_MEM_REGION_GART;
-	alloc.alignment = alignment;
-	alloc.size = size;
-	alloc.region_offset = &offset;
+static void vram_unbind(radeon_bo_classic *bo_base)
+{
+	radeon_bo_vram *bo = get_bo_vram(bo_base);

-	ret =
-	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
-				sizeof(alloc));
-	if (ret) {
-#if 0
-		WARN_ONCE("Ran out of mem!\n");
-		r300FlushCmdBuf(rmesa, __FUNCTION__);
-		//usleep(100);
-		tries2++;
-		tries = 0;
-		if (tries2 > 100) {
-			WARN_ONCE("Ran out of GART memory!\n");
-			exit(1);
-		}
-		goto again;
-#else
-		WARN_ONCE
-		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
-		     size);
-		return 0;
-#endif
+	if (bo->vram)
+		bo->vram->base.bound = 0;
+}
+
+/** Callback function called by the texture heap when a texture is evicted */
+static void destroy_vram_wrapper(void *data, driTextureObject *t)
+{
+	radeon_vram_wrapper *wrapper = (radeon_vram_wrapper*)t;
+
+	if (wrapper->bo && wrapper->bo->vram == wrapper) {
+		wrapper->bo->base.validated = 0;
+		wrapper->bo->vram = 0;
 	}
+}

-	i = free;
+static const radeon_bo_functions vram_bo_functions = {
+	.free = vram_free,
+	.validate = vram_validate,
+	.dirty = vram_dirty,
+	.bind = vram_bind,
+	.unbind = vram_unbind
+};

-	if (i > rmesa->rmm->u_last)
-		rmesa->rmm->u_last = i;
+/**
+ * Free a VRAM-based buffer object.
+ */
+static void static_free(radeon_bo_classic *bo_base)
+{
+	radeon_bo_vram *bo = get_bo_vram(bo_base);

-	rmesa->rmm->u_list[i].ptr =
-	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
-	rmesa->rmm->u_list[i].size = size;
-	rmesa->rmm->u_list[i].age = 0;
-	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
+	free(bo);
+}

-#ifdef MM_DEBUG
-	fprintf(stderr, "allocated %d at age %x\n", i,
-		radeonGetAge((radeonContextPtr) rmesa));
-#endif
+static void static_bind(radeon_bo_classic *bo_base)
+{
+}

-	return i;
+static void static_unbind(radeon_bo_classic *bo_base)
+{
 }

-void r300_mem_use(r300ContextPtr rmesa, int id)
+static void static_validate(radeon_bo_classic *bo_base)
 {
-	uint64_t ull;
-#ifdef MM_DEBUG
-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
-		radeonGetAge((radeonContextPtr) rmesa));
-#endif
-	drm_r300_cmd_header_t *cmd;
+}

-	assert(id <= rmesa->rmm->u_last);
+static void static_dirty(radeon_bo_classic *bo_base)
+{
+}

-	if (id == 0)
-		return;
+static const radeon_bo_functions static_bo_functions = {
+	.free = static_free,
+	.validate = static_validate,
+	.dirty = static_dirty,
+	.bind = static_bind,
+	.unbind = static_unbind
+};

-	cmd =
-	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
-						      2 + sizeof(ull) / 4,
-						      __FUNCTION__);
-	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
-	cmd[0].scratch.reg = R300_MEM_SCRATCH;
-	cmd[0].scratch.n_bufs = 1;
-	cmd[0].scratch.flags = 0;
-	cmd++;

-	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
-	_mesa_memcpy(cmd, &ull, sizeof(ull));
-	cmd += sizeof(ull) / 4;
+/**
+ * Allocate a backing store buffer object that is validated into VRAM.
+ */
+static dri_bo *vram_alloc(radeon_bufmgr_classic *bufmgr, const char *name,
+		unsigned long size, unsigned int alignment)
+{
+	radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram));
+
+	bo->base.functions = &vram_bo_functions;
+	bo->base.base.virtual = malloc(size);
+	init_buffer(bufmgr, &bo->base, size);
+	return &bo->base.base;
+}

-	cmd[0].u = /*id */ 0;

-	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
-	rmesa->rmm->u_list[id].h_pending++;
-	UNLOCK_HARDWARE(&rmesa->radeon);
+static dri_bo *bufmgr_classic_bo_alloc(dri_bufmgr *bufmgr_ctx, const char *name,
+		unsigned long size, unsigned int alignment,
+		uint64_t location_mask)
+{
+	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
+
+	if (location_mask & DRM_BO_MEM_CMDBUF) {
+		return cmdbuf_alloc(bufmgr, name, size);
+	} else if (location_mask & DRM_BO_MEM_DMA) {
+		return dma_alloc(bufmgr, name, size, alignment);
+	} else {
+		return vram_alloc(bufmgr, name, size, alignment);
+	}
 }

-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
+static dri_bo *bufmgr_classic_bo_alloc_static(dri_bufmgr *bufmgr_ctx, const char *name,
+					      unsigned long offset, unsigned long size,
+					      void *virtual, uint64_t location_mask)
 {
-	unsigned long offset;
+  	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
+	radeon_bo_vram* bo = (radeon_bo_vram*)calloc(1, sizeof(radeon_bo_vram));

-	assert(id <= rmesa->rmm->u_last);
+	bo->base.functions = &static_bo_functions;
+	bo->base.base.virtual = virtual;
+	bo->base.base.offset = offset + bufmgr->rmesa->radeon.radeonScreen->fbLocation;
+	bo->base.validated = 1; /* Static buffer offsets are always valid */

-	offset = (char *)rmesa->rmm->u_list[id].ptr -
-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
+	init_buffer(bufmgr, &bo->base, size);
+	return &bo->base.base;

-	return offset;
 }

-void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
+
+
+static void bufmgr_classic_bo_reference(dri_bo *bo_base)
 {
-#ifdef MM_DEBUG
-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
-		radeonGetAge((radeonContextPtr) rmesa));
-#endif
-	void *ptr;
-	int tries = 0;
+	radeon_bo_classic *bo = get_bo_classic(bo_base);
+	bo->refcount++;
+	assert(bo->refcount > 0);
+}

-	assert(id <= rmesa->rmm->u_last);
+static void bufmgr_classic_bo_unreference(dri_bo *bo_base)
+{
+	radeon_bo_classic *bo = get_bo_classic(bo_base);

-	if (access == R300_MEM_R) {
+	if (!bo_base)
+		return;

-		if (rmesa->rmm->u_list[id].mapped == 1)
-			WARN_ONCE("buffer %d already mapped\n", id);
+	assert(bo->refcount > 0);
+	bo->refcount--;
+	if (!bo->refcount) {
+		// Ugly HACK - figure out whether this is really necessary
+		get_bufmgr_classic(bo_base->bufmgr)->rmesa->dma.nr_released_bufs++;

-		rmesa->rmm->u_list[id].mapped = 1;
-		ptr = r300_mem_ptr(rmesa, id);
+		assert(!bo->mapcount);
+		if (!bo->pending)
+			bo_free(bo);
+	}
+}

-		return ptr;
+static int bufmgr_classic_bo_map(dri_bo *bo_base, GLboolean write_enable)
+{
+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo_base->bufmgr);
+	radeon_bo_classic *bo = get_bo_classic(bo_base);
+	assert(bo->refcount > 0);
+
+	if (bo->pending) {
+		track_pending_buffers(bufmgr);
+		if (bo->pending) {
+			// TODO: Better fence waiting
+			if (RADEON_DEBUG & DEBUG_MEMORY)
+				fprintf(stderr, "bo_map: buffer is pending. Flushing...\n");
+			radeonFinish(bufmgr->rmesa->radeon.glCtx);
+			track_pending_buffers(bufmgr);
+			if (bo->pending) {
+				fprintf(stderr, "Internal error or hardware lockup: bo_map: buffer is still pending.\n");
+				abort();
+			}
+		}
 	}

-	if (rmesa->rmm->u_list[id].h_pending)
-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+	if (write_enable && bo->functions->dirty)
+		bo->functions->dirty(bo);

-	if (rmesa->rmm->u_list[id].h_pending) {
-		return NULL;
-	}
+	bo->mapcount++;
+	assert(bo->mapcount > 0);
+	return 0;
+}

-	while (rmesa->rmm->u_list[id].age >
-	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
-		usleep(10);
+static int bufmgr_classic_bo_unmap(dri_bo *buf)
+{
+	radeon_bo_classic *bo = get_bo_classic(buf);
+	assert(bo->refcount > 0);
+	assert(bo->mapcount > 0);
+	bo->mapcount--;
+	return 0;
+}

-	if (tries >= 1000) {
-		fprintf(stderr, "Idling failed (%x vs %x)\n",
-			rmesa->rmm->u_list[id].age,
-			radeonGetAge((radeonContextPtr) rmesa));
-		return NULL;
+/**
+ * Mark the given buffer as pending and move it to the tail
+ * of the pending list.
+ * The caller is responsible for setting up pending_count and pending_age.
+ */
+static void move_to_pending_tail(radeon_bo_classic *bo)
+{
+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr);
+
+	if (bo->pending) {
+		*bo->pending_pprev = bo->pending_next;
+		if (bo->pending_next)
+			bo->pending_next->pending_pprev = bo->pending_pprev;
+		else
+			bufmgr->pending_tail = bo->pending_pprev;
 	}

-	if (rmesa->rmm->u_list[id].mapped == 1)
-		WARN_ONCE("buffer %d already mapped\n", id);
+	bo->pending = 1;
+	bo->pending_pprev = bufmgr->pending_tail;
+	bo->pending_next = 0;
+	*bufmgr->pending_tail = bo;
+	bufmgr->pending_tail = &bo->pending_next;
+}

-	rmesa->rmm->u_list[id].mapped = 1;
-	ptr = r300_mem_ptr(rmesa, id);
+/**
+ * Emit commands to the batch buffer that cause the guven buffer's
+ * pending_count and pending_age to be updated.
+ */
+static void emit_age_for_buffer(radeon_bo_classic* bo)
+{
+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(bo->base.bufmgr);
+	BATCH_LOCALS(bufmgr->rmesa);
+	drm_r300_cmd_header_t cmd;
+	uint64_t ull;

-	return ptr;
+	cmd.scratch.cmd_type = R300_CMD_SCRATCH;
+	cmd.scratch.reg = 2; /* Scratch register 2 corresponds to what radeonGetAge polls */
+	cmd.scratch.n_bufs = 1;
+	cmd.scratch.flags = 0;
+	ull = (uint64_t) (intptr_t) &bo->pending_age;
+
+	BEGIN_BATCH(4);
+	OUT_BATCH(cmd.u);
+	OUT_BATCH(ull & 0xffffffff);
+	OUT_BATCH(ull >> 32);
+	OUT_BATCH(0);
+	END_BATCH();
+	COMMIT_BATCH();
+
+	bo->pending_count++;
 }

-void r300_mem_unmap(r300ContextPtr rmesa, int id)
+static int bufmgr_classic_emit_reloc(dri_bo *batch_buf, uint64_t flags, GLuint delta,
+			GLuint offset, dri_bo *target)
 {
-#ifdef MM_DEBUG
-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
-		radeonGetAge((radeonContextPtr) rmesa));
-#endif
+	radeon_bo_classic *bo = get_bo_classic(batch_buf);
+	radeon_reloc *reloc;

-	assert(id <= rmesa->rmm->u_last);
+	if (bo->relocs_used >= bo->relocs_size) {
+		bo->relocs_size *= 2;
+		if (bo->relocs_size < 32)
+			bo->relocs_size = 32;

-	if (rmesa->rmm->u_list[id].mapped == 0)
-		WARN_ONCE("buffer %d not mapped\n", id);
+		bo->relocs = (radeon_reloc*)realloc(bo->relocs, bo->relocs_size*sizeof(radeon_reloc));
+	}

-	rmesa->rmm->u_list[id].mapped = 0;
+	reloc = &bo->relocs[bo->relocs_used++];
+	reloc->flags = flags;
+	reloc->offset = offset;
+	reloc->delta = delta;
+	reloc->target = get_bo_classic(target);
+	dri_bo_reference(target);
+	return 0;
 }

-void r300_mem_free(r300ContextPtr rmesa, int id)
+/* process_relocs is called just before the given command buffer
+ * is executed. It ensures that all referenced buffers are in
+ * the right GPU domain.
+ */
+static void *bufmgr_classic_process_relocs(dri_bo *batch_buf, GLuint *count)
 {
-#ifdef MM_DEBUG
-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
-		radeonGetAge((radeonContextPtr) rmesa));
-#endif
+	radeon_bo_classic *batch_bo = get_bo_classic(batch_buf);
+	int i;

-	assert(id <= rmesa->rmm->u_last);
+	// Warning: At this point, we append something to the batch buffer
+	// during flush.
+	emit_age_for_buffer(batch_bo);
+
+	dri_bo_map(batch_buf, GL_TRUE);
+	for(i = 0; i < batch_bo->relocs_used; ++i) {
+		radeon_reloc *reloc = &batch_bo->relocs[i];
+		uint32_t *dest = (uint32_t*)((char*)batch_buf->virtual + reloc->offset);
+		uint32_t offset;
+
+		if (!reloc->target->validated)
+			reloc->target->functions->validate(reloc->target);
+		reloc->target->used = 1;
+		offset = reloc->target->base.offset + reloc->delta;
+
+		if (reloc->flags & DRM_RELOC_BLITTER)
+			*dest = (*dest & 0xffc00000) | (offset >> 10);
+		else if (reloc->flags & DRM_RELOC_TXOFFSET)
+			*dest = (*dest & 31) | (offset & ~31);
+		else
+			*dest = offset;
+	}
+	dri_bo_unmap(batch_buf);
+	return 0;
+}

-	if (id == 0)
-		return;
+/* post_submit is called just after the given command buffer
+ * is executed. It ensures that buffers are properly marked as
+ * pending.
+ */
+static void bufmgr_classic_post_submit(dri_bo *batch_buf, dri_fence **fence)
+{
+	radeon_bo_classic *batch_bo = get_bo_classic(batch_buf);
+	int i;

-	if (rmesa->rmm->u_list[id].ptr == NULL) {
-		WARN_ONCE("Not allocated!\n");
-		return;
+	assert(!batch_bo->pending_count);
+
+	for(i = 0; i < batch_bo->relocs_used; ++i) {
+		radeon_reloc *reloc = &batch_bo->relocs[i];
+
+		if (reloc->target->used) {
+			reloc->target->used = 0;
+			assert(!reloc->target->pending_count);
+			reloc->target->pending_age = batch_bo->pending_age;
+			move_to_pending_tail(reloc->target);
+			if (reloc->target->functions->bind)
+				(*reloc->target->functions->bind)(reloc->target);
+		}
 	}
+}

-	if (rmesa->rmm->u_list[id].pending) {
-		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
-		return;
+static void bufmgr_classic_destroy(dri_bufmgr *bufmgr_ctx)
+{
+	radeon_bufmgr_classic* bufmgr = get_bufmgr_classic(bufmgr_ctx);
+
+	track_pending_buffers(bufmgr);
+	if (bufmgr->pending)
+		radeonFinish(bufmgr->rmesa->radeon.glCtx);
+	track_pending_buffers(bufmgr);
+
+	if (bufmgr->buffers) {
+		fprintf(stderr, "Warning: Buffer objects have leaked\n");
+		while(bufmgr->buffers) {
+			fprintf(stderr, "  Leak of size %ld\n", bufmgr->buffers->base.size);
+			bufmgr->buffers->refcount = 0;
+			bufmgr->buffers->mapcount = 0;
+			bufmgr->buffers->pending = 0;
+			bo_free(bufmgr->buffers);
+		}
 	}

-	rmesa->rmm->u_list[id].pending = 1;
+	driDestroyTextureHeap(bufmgr->texture_heap);
+	bufmgr->texture_heap = 0;
+	assert(is_empty_list(&bufmgr->texture_swapped));
+
+	free(bufmgr);
+}
+
+radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa)
+{
+	radeon_bufmgr_classic* bufmgr = (radeon_bufmgr_classic*)calloc(1, sizeof(radeon_bufmgr_classic));
+
+	bufmgr->rmesa = rmesa;
+	bufmgr->base.base.bo_alloc = &bufmgr_classic_bo_alloc;
+	bufmgr->base.base.bo_alloc_static = bufmgr_classic_bo_alloc_static;
+	bufmgr->base.base.bo_reference = &bufmgr_classic_bo_reference;
+	bufmgr->base.base.bo_unreference = &bufmgr_classic_bo_unreference;
+	bufmgr->base.base.bo_map = &bufmgr_classic_bo_map;
+	bufmgr->base.base.bo_unmap = &bufmgr_classic_bo_unmap;
+	bufmgr->base.base.emit_reloc = &bufmgr_classic_emit_reloc;
+	bufmgr->base.base.process_relocs = &bufmgr_classic_process_relocs;
+	bufmgr->base.base.post_submit = &bufmgr_classic_post_submit;
+	bufmgr->base.base.destroy = &bufmgr_classic_destroy;
+
+	bufmgr->pending_tail = &bufmgr->pending;
+
+	/* Init texture heap */
+	make_empty_list(&bufmgr->texture_swapped);
+	bufmgr->texture_heap = driCreateTextureHeap(0, bufmgr,
+			rmesa->radeon.radeonScreen->texSize[0], 12, RADEON_NR_TEX_REGIONS,
+			(drmTextureRegionPtr)rmesa->radeon.sarea->tex_list[0],
+			&rmesa->radeon.sarea->tex_age[0],
+			&bufmgr->texture_swapped, sizeof(radeon_vram_wrapper),
+			&destroy_vram_wrapper);
+	bufmgr->texture_offset = rmesa->radeon.radeonScreen->texOffset[0];
+
+	return &bufmgr->base;
+}
+
+void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx)
+{
+	radeon_bufmgr_classic *bufmgr = get_bufmgr_classic(&bufmgr_ctx->base);
+
+	DRI_AGE_TEXTURES(bufmgr->texture_heap);
 }
-#endif
diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
index 625a7f6..4e9be65 100644
--- a/src/mesa/drivers/dri/r300/r300_mem.h
+++ b/src/mesa/drivers/dri/r300/r300_mem.h
@@ -1,37 +1,22 @@
 #ifndef __R300_MEM_H__
 #define __R300_MEM_H__

-//#define R300_MEM_PDL 0
-#define R300_MEM_UL 1
+#include "glheader.h"
+#include "dri_bufmgr.h"

-#define R300_MEM_R 1
-#define R300_MEM_W 2
-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
+#include "r300_context.h"

-#define R300_MEM_SCRATCH 2

-struct r300_memory_manager {
-	struct {
-		void *ptr;
-		uint32_t size;
-		uint32_t age;
-		uint32_t h_pending;
-		int pending;
-		int mapped;
-	} *u_list;
-	int u_head, u_size, u_last;
+/* Note: The following flags should probably be ultimately eliminated,
+ * or replaced by something else.
+ */
+#define DRM_BO_MEM_DMA (1 << 27) /** Use for transient buffers (texture upload, vertex buffers...) */
+#define DRM_BO_MEM_CMDBUF (1 << 28) /** Use for command buffers */

-};
+#define DRM_RELOC_BLITTER (1 << 23) /** Offset overwrites lower 22 bits (used with blit packet3) */
+#define DRM_RELOC_TXOFFSET (1 << 24) /** Offset overwrites everything but low bits (used for texture offsets) */

-extern void r300_mem_init(r300ContextPtr rmesa);
-extern void r300_mem_destroy(r300ContextPtr rmesa);
-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
-extern void r300_mem_use(r300ContextPtr rmesa, int id);
-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
-extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
-extern void r300_mem_free(r300ContextPtr rmesa, int id);
+radeon_bufmgr* radeonBufmgrClassicInit(r300ContextPtr rmesa);
+void radeonBufmgrContendedLockTake(radeon_bufmgr* bufmgr_ctx);

 #endif
diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
new file mode 100644
index 0000000..c3b918c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_mipmap_tree.h"
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "simple_list.h"
+#include "texcompress.h"
+#include "texformat.h"
+
+#include "r300_mem.h"
+
+static GLuint r300_compressed_texture_size(GLcontext *ctx,
+		GLsizei width, GLsizei height, GLsizei depth,
+		GLuint mesaFormat)
+{
+	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
+
+	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
+	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
+		if (width + 3 < 8)	/* width one block */
+			size = size * 4;
+		else if (width + 3 < 16)
+			size = size * 2;
+	} else {
+		/* DXT3/5, 16 bytes per block */
+		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
+		if (width + 3 < 8)
+			size = size * 2;
+	}
+
+	return size;
+}
+
+/**
+ * Compute sizes and fill in offset and blit information for the given
+ * image (determined by \p face and \p level).
+ *
+ * \param curOffset points to the offset at which the image is to be stored
+ * and is updated by this function according to the size of the image.
+ */
+static void compute_tex_image_offset(r300_mipmap_tree *mt,
+	GLuint face, GLuint level, GLuint* curOffset)
+{
+	r300_mipmap_level *lvl = &mt->levels[level];
+
+	/* Find image size in bytes */
+	if (mt->compressed) {
+		lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx,
+			lvl->width, lvl->height, lvl->depth, mt->compressed);
+	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
+		lvl->size = ((lvl->width * mt->bpp + 63) & ~63) * lvl->height;
+	} else if (mt->tilebits & R300_TXO_MICRO_TILE) {
+		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+		 * though the actual offset may be different (if texture is less than
+		 * 32 bytes width) to the untiled case */
+		int w = (lvl->width * mt->bpp * 2 + 31) & ~31;
+		lvl->size = (w * ((lvl->height + 1) / 2)) * lvl->depth;
+	} else {
+		int w = (lvl->width * mt->bpp + 31) & ~31;
+		lvl->size = w * lvl->height * lvl->depth;
+	}
+	assert(lvl->size > 0);
+
+	/* All images are aligned to a 32-byte offset */
+	*curOffset = (*curOffset + 0x1f) & ~0x1f;
+	lvl->faces[face].offset = *curOffset;
+	*curOffset += lvl->size;
+}
+
+static GLuint minify(GLuint size, GLuint levels)
+{
+	size = size >> levels;
+	if (size < 1)
+		size = 1;
+	return size;
+}
+
+static void calculate_miptree_layout(r300_mipmap_tree *mt)
+{
+	GLuint curOffset;
+	GLuint numLevels;
+	GLuint i;
+
+	numLevels = mt->lastLevel - mt->firstLevel + 1;
+	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+
+	curOffset = 0;
+	for(i = 0; i < numLevels; i++) {
+		GLuint face;
+
+		mt->levels[i].width = minify(mt->width0, mt->firstLevel + i);
+		mt->levels[i].height = minify(mt->height0, mt->firstLevel + i);
+		mt->levels[i].depth = minify(mt->depth0, mt->firstLevel + i);
+
+		for(face = 0; face < mt->faces; face++)
+			compute_tex_image_offset(mt, face, i, &curOffset);
+	}
+
+	/* Note the required size in memory */
+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+}
+
+
+/**
+ * Create a new mipmap tree, calculate its layout and allocate memory.
+ */
+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
+		GLenum target, GLuint firstLevel, GLuint lastLevel,
+		GLuint width0, GLuint height0, GLuint depth0,
+		GLuint bpp, GLuint tilebits, GLuint compressed)
+{
+	r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree);
+
+	mt->r300 = rmesa;
+	mt->t = t;
+	mt->target = target;
+	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+	mt->firstLevel = firstLevel;
+	mt->lastLevel = lastLevel;
+	mt->width0 = width0;
+	mt->height0 = height0;
+	mt->depth0 = depth0;
+	mt->bpp = bpp;
+	mt->tilebits = tilebits;
+	mt->compressed = compressed;
+
+	calculate_miptree_layout(mt);
+
+	mt->bo = dri_bo_alloc(&rmesa->radeon.bufmgr->base, "texture", mt->totalsize, 1024, 0);
+
+	return mt;
+}
+
+/**
+ * Destroy the given mipmap tree.
+ */
+void r300_miptree_destroy(r300_mipmap_tree *mt)
+{
+	dri_bo_unreference(mt->bo);
+	free(mt);
+}
+
+/*
+ * XXX Move this into core Mesa?
+ */
+static void
+_mesa_copy_rect(GLubyte * dst,
+                GLuint cpp,
+                GLuint dst_pitch,
+                GLuint dst_x,
+                GLuint dst_y,
+                GLuint width,
+                GLuint height,
+                const GLubyte * src,
+                GLuint src_pitch, GLuint src_x, GLuint src_y)
+{
+   GLuint i;
+
+   dst_pitch *= cpp;
+   src_pitch *= cpp;
+   dst += dst_x * cpp;
+   src += src_x * cpp;
+   dst += dst_y * dst_pitch;
+   src += src_y * dst_pitch;
+   width *= cpp;
+
+   if (width == dst_pitch && width == src_pitch)
+      memcpy(dst, src, height * width);
+   else {
+      for (i = 0; i < height; i++) {
+         memcpy(dst, src, width);
+         dst += dst_pitch;
+         src += src_pitch;
+      }
+   }
+}
+
+/**
+ * Upload the given texture image to the given face/level of the mipmap tree.
+ * \param level of the texture, i.e. \c level==mt->firstLevel is the first hw level
+ */
+void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level,
+			       struct gl_texture_image *texImage)
+{
+	GLuint hwlevel = level - mt->firstLevel;
+	r300_mipmap_level *lvl = &mt->levels[hwlevel];
+	void *dest;
+
+	assert(face < mt->faces);
+	assert(level >= mt->firstLevel && level <= mt->lastLevel);
+	assert(texImage && texImage->Data);
+	assert(texImage->Width == lvl->width);
+	assert(texImage->Height == lvl->height);
+	assert(texImage->Depth == lvl->depth);
+
+	dri_bo_map(mt->bo, GL_TRUE);
+
+	dest = mt->bo->virtual + lvl->faces[face].offset;
+
+	if (mt->tilebits)
+		WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
+
+	if (!mt->compressed) {
+		GLuint dst_align;
+		GLuint dst_pitch = lvl->width;
+		GLuint src_pitch = lvl->width;
+
+		if (mt->target == GL_TEXTURE_RECTANGLE_NV)
+			dst_align = 64 / mt->bpp;
+		else
+			dst_align = 32 / mt->bpp;
+		dst_pitch = (dst_pitch + dst_align - 1) & ~(dst_align - 1);
+
+		_mesa_copy_rect(dest, mt->bpp, dst_pitch, 0, 0, lvl->width, lvl->height,
+				texImage->Data, src_pitch, 0, 0);
+	} else {
+		memcpy(dest, texImage->Data, lvl->size);
+	}
+
+	dri_bo_unmap(mt->bo);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
new file mode 100644
index 0000000..a888ecf
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_MIPMAP_TREE_H_
+#define __R300_MIPMAP_TREE_H_
+
+#include "r300_context.h"
+
+typedef struct _r300_mipmap_tree r300_mipmap_tree;
+typedef struct _r300_mipmap_level r300_mipmap_level;
+typedef struct _r300_mipmap_image r300_mipmap_image;
+
+struct _r300_mipmap_image {
+	GLuint offset; /** Offset of this image from the start of mipmap tree, in bytes */
+};
+
+struct _r300_mipmap_level {
+	GLuint width;
+	GLuint height;
+	GLuint depth;
+	GLuint size; /** Size of each image, in bytes */
+	r300_mipmap_image faces[6];
+};
+
+
+/**
+ * A mipmap tree contains texture images in the layout that the hardware
+ * expects.
+ *
+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
+ * changed.
+ */
+struct _r300_mipmap_tree {
+	r300ContextPtr r300;
+	r300TexObj *t;
+	dri_bo *bo;
+
+	GLuint totalsize; /** total size of the miptree, in bytes */
+
+	GLenum target; /** GL_TEXTURE_xxx */
+	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
+	GLuint firstLevel; /** First mip level stored in this mipmap tree */
+	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
+
+	GLuint width0; /** Width of level 0 image */
+	GLuint height0; /** Height of level 0 image */
+	GLuint depth0; /** Depth of level 0 image */
+
+	GLuint bpp; /** Bytes per texel */
+	GLuint tilebits; /** R300_TXO_xxx_TILE */
+	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
+
+	r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS];
+};
+
+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
+		GLenum target, GLuint firstLevel, GLuint lastLevel,
+		GLuint width0, GLuint height0, GLuint depth0,
+		GLuint bpp, GLuint tilebits, GLuint compressed);
+void r300_miptree_destroy(r300_mipmap_tree *mt);
+
+void r300_miptree_upload_image(r300_mipmap_tree *mt, GLuint face, GLuint level,
+			       struct gl_texture_image *texImage);
+
+
+#endif /* __R300_MIPMAP_TREE_H_ */
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 0a199e6..209fae9 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -175,89 +175,79 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
 	void *out;

-	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
-		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
-		rvb->start = ((char *)elts) - rvb->address;
-		rvb->aos_offset =
-		    rmesa->radeon.radeonScreen->gart_texture_offset +
-		    rvb->start;
-		return;
-	} else if (r300IsGartMemory(rmesa, elts, 1)) {
-		WARN_ONCE("Pointer not within GART memory!\n");
-		_mesa_exit(-1);
-	}
-
-	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
-	rvb->aos_offset = GET_START(rvb);
+	r300AllocDmaRegion(rmesa, &rmesa->state.elt_dma_bo, &rmesa->state.elt_dma_offset,
+			   n_elts * 4, 4);

-	out = rvb->address + rvb->start;
+	out = rmesa->state.elt_dma_bo->virtual + rmesa->state.elt_dma_offset;
 	memcpy(out, elts, n_elts * 4);
 }

-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
-		       int vertex_count, int type)
+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 {
-	int cmd_reserved = 0;
-	int cmd_written = 0;
-	drm_radeon_cmd_header_t *cmd = NULL;
+	BATCH_LOCALS(rmesa);

-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
-	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+	BEGIN_BATCH(8);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);

-	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
-	e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
-	e32(addr);
-	e32(vertex_count);
+	OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+	OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
+	OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo, rmesa->state.elt_dma_offset, 0);
+	OUT_BATCH(vertex_count);
+	END_BATCH();
 }

 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 {
+	BATCH_LOCALS(rmesa);
 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
 	int i;
-	int cmd_reserved = 0;
-	int cmd_written = 0;
-	drm_radeon_cmd_header_t *cmd = NULL;

 	if (RADEON_DEBUG & DEBUG_VERTS)
 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
 			offset);

-	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
-	e32(nr);
+	BEGIN_BATCH(sz+2);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+	OUT_BATCH(nr);

 	for (i = 0; i + 1 < nr; i += 2) {
-		e32((rmesa->state.aos[i].aos_size << 0) |
-		    (rmesa->state.aos[i].aos_stride << 8) |
-		    (rmesa->state.aos[i + 1].aos_size << 16) |
-		    (rmesa->state.aos[i + 1].aos_stride << 24));
-
-		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
-		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
+		OUT_BATCH((rmesa->state.aos[i].components << 0) |
+			  (rmesa->state.aos[i].stride << 8) |
+			  (rmesa->state.aos[i + 1].components << 16) |
+			  (rmesa->state.aos[i + 1].stride << 24));
+
+		OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo,
+			rmesa->state.aos[i].offset + offset * 4 * rmesa->state.aos[i].stride, 0);
+		OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo,
+			rmesa->state.aos[i+1].offset + offset * 4 * rmesa->state.aos[i + 1].stride, 0);
 	}

 	if (nr & 1) {
-		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
-		    (rmesa->state.aos[nr - 1].aos_stride << 8));
-		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
+		OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
+			  (rmesa->state.aos[nr - 1].stride << 8));
+		OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo,
+			rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0);
 	}
+	END_BATCH();
 }

 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
 {
-	int cmd_reserved = 0;
-	int cmd_written = 0;
-	drm_radeon_cmd_header_t *cmd = NULL;
+	BATCH_LOCALS(rmesa);

-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
-	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
+	BEGIN_BATCH(3);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
+	END_BATCH();
 }

 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 				   int start, int end, int prim)
 {
+	BATCH_LOCALS(rmesa);
 	int type, num_verts;
 	TNLcontext *tnl = TNL_CONTEXT(ctx);
 	struct vertex_buffer *vb = &tnl->vb;
@@ -268,6 +258,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 	if (type < 0 || num_verts <= 0)
 		return;

+	/* Make space for at least 64 dwords.
+	 * This is supposed to ensure that we can get all rendering
+	 * commands into a single command buffer.
+	 */
+	r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
+
 	if (vb->Elts) {
 		if (num_verts > 65535) {
 			/* not implemented yet */
@@ -287,11 +283,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
 		 */
 		r300EmitElts(ctx, vb->Elts, num_verts);
 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
-		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
+		r300FireEB(rmesa, num_verts, type);
 	} else {
 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
 		r300FireAOS(rmesa, num_verts, type);
 	}
+	COMMIT_BATCH();
 }

 static GLboolean r300RunRender(GLcontext * ctx,
@@ -324,10 +321,6 @@ static GLboolean r300RunRender(GLcontext * ctx,

 	r300EmitCacheFlush(rmesa);

-#ifdef USER_BUFFERS
-	r300UseArrays(ctx);
-#endif
-
 	r300ReleaseArrays(ctx);

 	return GL_FALSE;
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index cce07d3..b314764 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 #include "radeon_ioctl.h"
 #include "radeon_state.h"
+#include "radeon_buffer.h"
 #include "r300_context.h"
 #include "r300_ioctl.h"
 #include "r300_state.h"
@@ -1148,39 +1149,25 @@ void r300UpdateDrawBuffer(GLcontext * ctx)
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	r300ContextPtr r300 = rmesa;
 	struct gl_framebuffer *fb = ctx->DrawBuffer;
-	driRenderbuffer *drb;
+	struct radeon_renderbuffer *rrb;

 	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
 		/* draw to front */
-		drb =
-		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
-		    Renderbuffer;
+		rrb =
+		    (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
 	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
 		/* draw to back */
-		drb =
-		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
-		    Renderbuffer;
+		rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
 	} else {
 		/* drawing to multiple buffers, or none */
 		return;
 	}

-	assert(drb);
-	assert(drb->flippedPitch);
+	assert(rrb);
+	assert(rrb->pitch);

 	R300_STATECHANGE(rmesa, cb);

-	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
-	    r300->radeon.radeonScreen->fbLocation;
-	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
-
-	if (r300->radeon.radeonScreen->cpp == 4)
-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
-	else
-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-
-	if (r300->radeon.sarea->tiling_enabled)
-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
 #if 0
 	R200_STATECHANGE(rmesa, ctx);

@@ -1499,14 +1486,9 @@ static void r300SetupTextures(GLcontext * ctx)
 	/* We cannot let disabled tmu offsets pass DRM */
 	for (i = 0; i < mtu; i++) {
 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
-
-#if 0				/* Enables old behaviour */
-			hw_tmu = i;
-#endif
 			tmu_mappings[i] = hw_tmu;

-			t = r300->state.texture.unit[i].texobj;
-			/* XXX questionable fix for bug 9170: */
+			t = r300_tex_obj(ctx->Texture.Unit[i]._Current);
 			if (!t)
 				continue;

@@ -1532,21 +1514,20 @@ static void r300SetupTextures(GLcontext * ctx)
 			 */
 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
 				t->filter_1 |
-				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
+				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
 			    t->size;
 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
 						hw_tmu] = t->format;
 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
 			    t->pitch_reg;
-			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
-						hw_tmu] = t->offset;
+			r300->hw.textures[hw_tmu] = t;

-			if (t->offset & R300_TXO_MACRO_TILE) {
+			if (t->tile_bits & R300_TXO_MACRO_TILE) {
 				WARN_ONCE("macro tiling enabled!\n");
 			}

-			if (t->offset & R300_TXO_MICRO_TILE) {
+			if (t->tile_bits & R300_TXO_MICRO_TILE) {
 				WARN_ONCE("micro tiling enabled!\n");
 			}

@@ -2373,20 +2354,6 @@ static void r300ResetHwState(r300ContextPtr r300)

 	r300BlendColor(ctx, ctx->Color.BlendColor);

-	/* Again, r300ClearBuffer uses this */
-	r300->hw.cb.cmd[R300_CB_OFFSET] =
-	    r300->radeon.state.color.drawOffset +
-	    r300->radeon.radeonScreen->fbLocation;
-	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
-
-	if (r300->radeon.radeonScreen->cpp == 4)
-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
-	else
-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-
-	if (r300->radeon.sarea->tiling_enabled)
-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
-
 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
@@ -2402,10 +2369,6 @@ static void r300ResetHwState(r300ContextPtr r300)
 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;

-	r300->hw.zb.cmd[R300_ZB_OFFSET] =
-	    r300->radeon.radeonScreen->depthOffset +
-	    r300->radeon.radeonScreen->fbLocation;
-	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;

 	if (r300->radeon.sarea->tiling_enabled) {
 		/* XXX: Turn off when clearing buffers ? */
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
index 0589ab7..96177ba 100644
--- a/src/mesa/drivers/dri/r300/r300_state.h
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R300_FIREVERTICES( r300 )			\
 do {							\
     \
-   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
+   if ( (r300)->cmdbuf.committed || (r300)->dma.flush ) {	\
       r300Flush( (r300)->radeon.glCtx );		\
    }							\
     \
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index 8aebd9b..f4a0b7f 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -61,7 +61,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 static void flush_last_swtcl_prim( r300ContextPtr rmesa  );


-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset);
 void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
 #define EMIT_ATTR( ATTR, STYLE )					\
 do {									\
@@ -175,7 +175,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
 			inputs[i] = -1;
 		}
 	}
-
+
 	/* Fixed, apply to vir0 only */
 	if (InputsRead & (1 << VERT_ATTRIB_POS))
 		inputs[VERT_ATTRIB_POS] = 0;
@@ -186,16 +186,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
 		if (InputsRead & (1 << i))
 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-
+
 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
 		if (InputsRead & (1 << i)) {
 			tab[nr++] = i;
 		}
 	}
-
+
 	for (i = 0; i < nr; i++) {
 		int ci;
-
+
 		swizzle[i][0] = SWIZZLE_ZERO;
 		swizzle[i][1] = SWIZZLE_ZERO;
 		swizzle[i][2] = SWIZZLE_ZERO;
@@ -215,21 +215,21 @@ static void r300SetVertexFormat( GLcontext *ctx )
 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
 		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
 				   nr);
-
+
 	R300_STATECHANGE(rmesa, vic);
 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
 	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-
+
 	R300_STATECHANGE(rmesa, vof);
 	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
-
+
 	rmesa->swtcl.vertex_size =
 		_tnl_install_attrs( ctx,
-				    rmesa->swtcl.vertex_attrs,
+				    rmesa->swtcl.vertex_attrs,
 				    rmesa->swtcl.vertex_attr_count,
 				    NULL, 0 );
-
+
 	rmesa->swtcl.vertex_size /= 4;

 	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
@@ -245,38 +245,40 @@ static void r300SetVertexFormat( GLcontext *ctx )
  */
 static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
 {
+	BATCH_LOCALS(rmesa);
+
 	if (RADEON_DEBUG & DEBUG_IOCTL)
 		fprintf(stderr, "%s\n", __FUNCTION__);
-
+
 	rmesa->dma.flush = NULL;

-	if (rmesa->dma.current.buf) {
-		struct r300_dma_region *current = &rmesa->dma.current;
-		GLuint current_offset = GET_START(current);
+	if (rmesa->dma.current) {
+		GLuint current_offset = rmesa->dma.current_used;

-		assert (current->start +
+		assert (rmesa->dma.current_used +
 			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-			current->ptr);
+			rmesa->dma.current_vertexptr);

-		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+		if (rmesa->dma.current_used != rmesa->dma.current_vertexptr) {
+			rmesa->dma.current_used = rmesa->dma.current_vertexptr;

 			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
-
+
 			r300EmitState(rmesa);
-
+
 			r300EmitVertexAOS( rmesa,
 					   rmesa->swtcl.vertex_size,
-					   current_offset);
-
+					   rmesa->dma.current, current_offset);
+
 			r300EmitVbufPrim( rmesa,
 					  rmesa->swtcl.hw_primitive,
 					  rmesa->swtcl.numverts);
-
+
 			r300EmitCacheFlush(rmesa);
+			COMMIT_BATCH();
 		}
-
+
 		rmesa->swtcl.numverts = 0;
-		current->start = current->ptr;
 	}
 }

@@ -287,7 +289,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
 {
 	GLuint bytes = vsize * nverts;

-	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size)
 		r300RefillCurrentDmaRegion( rmesa, bytes);

 	if (!rmesa->dma.flush) {
@@ -297,13 +299,13 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )

 	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
-	ASSERT( rmesa->dma.current.start +
+	ASSERT( rmesa->dma.current_used +
 		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-		rmesa->dma.current.ptr );
+		rmesa->dma.current_vertexptr );

 	{
-		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
-		rmesa->dma.current.ptr += bytes;
+		GLubyte *head = (GLubyte *) (rmesa->dma.current->virtual + rmesa->dma.current_vertexptr);
+		rmesa->dma.current_vertexptr += bytes;
 		rmesa->swtcl.numverts += nverts;
 		return head;
 	}
@@ -352,7 +354,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
    const char *r300verts = (char *)rmesa->swtcl.verts;
 #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
-#define VERTEX r300Vertex
+#define VERTEX r300Vertex
 #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
 #define PRINT_VERTEX(x)
 #undef TAG
@@ -572,15 +574,15 @@ static void r300RenderStart(GLcontext *ctx)
         r300ContextPtr rmesa = R300_CONTEXT( ctx );
 	//	fprintf(stderr, "%s\n", __FUNCTION__);

-	r300ChooseRenderState(ctx);
+	r300ChooseRenderState(ctx);
 	r300SetVertexFormat(ctx);

 	r300UpdateShaders(rmesa);
 	r300UpdateShaderStates(rmesa);

 	r300EmitCacheFlush(rmesa);
-
-	if (rmesa->dma.flush != 0 &&
+
+	if (rmesa->dma.flush != 0 &&
 	    rmesa->dma.flush != flush_last_swtcl_prim)
 		rmesa->dma.flush( rmesa );

@@ -593,7 +595,7 @@ static void r300RenderFinish(GLcontext *ctx)
 static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
+
 	if (rmesa->swtcl.hw_primitive != hwprim) {
 	        R300_NEWPRIM( rmesa );
 		rmesa->swtcl.hw_primitive = hwprim;
@@ -611,7 +613,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)

 	r300RasterPrimitive( ctx, reduced_prim[prim] );
 	//	fprintf(stderr, "%s\n", __FUNCTION__);
-
+
 }

 static void r300ResetLineStipple(GLcontext *ctx)
@@ -625,12 +627,12 @@ void r300InitSwtcl(GLcontext *ctx)
 	TNLcontext *tnl = TNL_CONTEXT(ctx);
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	static int firsttime = 1;
-
+
 	if (firsttime) {
 		init_rast_tab();
 		firsttime = 0;
 	}
-
+
 	tnl->Driver.Render.Start = r300RenderStart;
 	tnl->Driver.Render.Finish = r300RenderFinish;
 	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
@@ -638,15 +640,15 @@ void r300InitSwtcl(GLcontext *ctx)
 	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
 	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
 	tnl->Driver.Render.Interp = _tnl_interp;
-
+
 	/* FIXME: what are these numbers? */
-	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
 			    48 * sizeof(GLfloat) );
-
+
 	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
 	rmesa->swtcl.RenderIndex = ~0;
 	rmesa->swtcl.render_primitive = GL_TRIANGLES;
-	rmesa->swtcl.hw_primitive = 0;
+	rmesa->swtcl.hw_primitive = 0;

 	_tnl_invalidate_vertex_state( ctx, ~0 );
 	_tnl_invalidate_vertices( ctx, ~0 );
@@ -655,9 +657,9 @@ void r300InitSwtcl(GLcontext *ctx)
 	_tnl_need_projected_coords( ctx, GL_FALSE );
 	r300ChooseRenderState(ctx);

-	_mesa_validate_all_lighting_tables( ctx );
+	_mesa_validate_all_lighting_tables( ctx );

-	tnl->Driver.NotifyMaterialChange =
+	tnl->Driver.NotifyMaterialChange =
 	  _mesa_validate_all_lighting_tables;
 }

@@ -665,33 +667,32 @@ void r300DestroySwtcl(GLcontext *ctx)
 {
 }

-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, dri_bo *bo, GLuint offset)
 {
-	int cmd_reserved = 0;
-	int cmd_written = 0;
+	BATCH_LOCALS(rmesa);

-	drm_radeon_cmd_header_t *cmd = NULL;
 	if (RADEON_DEBUG & DEBUG_VERTS)
-	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
-		  __FUNCTION__, vertex_size, offset);
-
-	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
-	e32(1);
-	e32(vertex_size | (vertex_size << 8));
-	e32(offset);
+		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
+			__FUNCTION__, vertex_size, offset);
+
+	BEGIN_BATCH(5);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
+	OUT_BATCH(1);
+	OUT_BATCH(vertex_size | (vertex_size << 8));
+	OUT_BATCH_RELOC(0, bo, offset, 0);
+	END_BATCH();
 }

 void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
 {
-
-	int cmd_reserved = 0;
-	int cmd_written = 0;
+	BATCH_LOCALS(rmesa);
 	int type, num_verts;
-	drm_radeon_cmd_header_t *cmd = NULL;

 	type = r300PrimitiveType(rmesa, primitive);
 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
-
-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
-	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+
+	BEGIN_BATCH(3);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+	END_BATCH();
 }
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index f7f4972..c6ee1b5 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_context.h"
 #include "r300_state.h"
 #include "r300_ioctl.h"
+#include "r300_mipmap_tree.h"
 #include "r300_tex.h"

 #include "xmlpool.h"
@@ -78,7 +79,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
  */
 static void r300UpdateTexWrap(r300TexObjPtr t)
 {
-	struct gl_texture_object *tObj = t->base.tObj;
+	struct gl_texture_object *tObj = &t->base;

 	t->filter &=
 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
@@ -175,39 +176,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
 	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
 }

-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
-{
-	r300TexObjPtr t;
-
-	t = CALLOC_STRUCT(r300_tex_obj);
-	texObj->DriverData = t;
-	if (t != NULL) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE) {
-			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
-				(void *)texObj, (void *)t);
-		}
-
-		/* Initialize non-image-dependent parts of the state:
-		 */
-		t->base.tObj = texObj;
-		t->border_fallback = GL_FALSE;
-
-		make_empty_list(&t->base);
-
-		r300UpdateTexWrap(t);
-		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
-		r300SetTexBorderColor(t, texObj->_BorderChan);
-	}
-
-	return t;
-}
-
 /* try to find a format which will only need a memcopy */
 static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
 							       GLenum srcType)
@@ -433,95 +401,14 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
 	return NULL;		/* never get here */
 }

-static GLboolean
-r300ValidateClientStorage(GLcontext * ctx, GLenum target,
-			  GLint internalFormat,
-			  GLint srcWidth, GLint srcHeight,
-			  GLenum format, GLenum type, const void *pixels,
-			  const struct gl_pixelstore_attrib *packing,
-			  struct gl_texture_object *texObj,
-			  struct gl_texture_image *texImage)
+/**
+ * Marks the given face/level pair as dirty.
+ * This will cause an appropriate texture reupload the next time this
+ * texture is validated.
+ */
+static void mark_texture_image_dirty(r300TexObj *t, int face, int level)
 {
-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
-	if (RADEON_DEBUG & DEBUG_TEXTURE)
-		fprintf(stderr, "intformat %s format %s type %s\n",
-			_mesa_lookup_enum_by_nr(internalFormat),
-			_mesa_lookup_enum_by_nr(format),
-			_mesa_lookup_enum_by_nr(type));
-
-	if (!ctx->Unpack.ClientStorage)
-		return 0;
-
-	if (ctx->_ImageTransferState ||
-	    texImage->IsCompressed || texObj->GenerateMipmap)
-		return 0;
-
-	/* This list is incomplete, may be different on ppc???
-	 */
-	switch (internalFormat) {
-	case GL_RGBA:
-		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
-			texImage->TexFormat = _dri_texformat_argb8888;
-		} else
-			return 0;
-		break;
-
-	case GL_RGB:
-		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
-			texImage->TexFormat = _dri_texformat_rgb565;
-		} else
-			return 0;
-		break;
-
-	case GL_YCBCR_MESA:
-		if (format == GL_YCBCR_MESA &&
-		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
-			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
-		} else if (format == GL_YCBCR_MESA &&
-			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
-			    type == GL_UNSIGNED_BYTE)) {
-			texImage->TexFormat = &_mesa_texformat_ycbcr;
-		} else
-			return 0;
-		break;
-
-	default:
-		return 0;
-	}
-
-	/* Could deal with these packing issues, but currently don't:
-	 */
-	if (packing->SkipPixels ||
-	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
-		return 0;
-	}
-
-	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
-						    format, type);
-
-	if (RADEON_DEBUG & DEBUG_TEXTURE)
-		fprintf(stderr, "%s: srcRowStride %d/%x\n",
-			__FUNCTION__, srcRowStride, srcRowStride);
-
-	/* Could check this later in upload, pitch restrictions could be
-	 * relaxed, but would need to store the image pitch somewhere,
-	 * as packing details might change before image is uploaded:
-	 */
-	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
-	    || (srcRowStride & 63))
-		return 0;
-
-	/* Have validated that _mesa_transfer_teximage would be a straight
-	 * memcpy at this point.  NOTE: future calls to TexSubImage will
-	 * overwrite the client data.  This is explicitly mentioned in the
-	 * extension spec.
-	 */
-	texImage->Data = (void *)pixels;
-	texImage->IsClientData = GL_TRUE;
-	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
-
-	return 1;
+	t->dirty_images[face] |= 1 << level;
 }

 static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
@@ -532,24 +419,13 @@ static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
 			   struct gl_texture_object *texObj,
 			   struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);

-	if (t) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
-			return;
-		}
-	}
-
-	/* Note, this will call ChooseTextureFormat */
 	_mesa_store_teximage1d(ctx, target, level, internalFormat,
 			       width, border, format, type, pixels,
 			       &ctx->Unpack, texObj, texImage);

-	t->dirty_images[0] |= (1 << level);
+	mark_texture_image_dirty(t, 0, level);
 }

 static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
@@ -561,24 +437,13 @@ static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
 			      struct gl_texture_object *texObj,
 			      struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
-	assert(t);		/* this _should_ be true */
-	if (t) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
-			return;
-		}
-	}
+	r300TexObj* t = r300_tex_obj(texObj);

 	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
 				  format, type, pixels, packing, texObj,
 				  texImage);

-	t->dirty_images[0] |= (1 << level);
+	mark_texture_image_dirty(t, 0, level);
 }

 static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
@@ -589,7 +454,7 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
 			   struct gl_texture_object *texObj,
 			   struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);
 	GLuint face;

 	/* which cube face or ordinary 2D image */
@@ -608,43 +473,23 @@ static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
 		face = 0;
 	}

-	if (t != NULL) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
-			return;
-		}
-	}
-
 	texImage->IsClientData = GL_FALSE;

-	if (r300ValidateClientStorage(ctx, target,
-				      internalFormat,
-				      width, height,
-				      format, type, pixels,
-				      packing, texObj, texImage)) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: Using client storage\n",
-				__FUNCTION__);
-	} else {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: Using normal storage\n",
-				__FUNCTION__);
-
-		/* Normal path: copy (to cached memory) and eventually upload
-		 * via another copy to GART memory and then a blit...  Could
-		 * eliminate one copy by going straight to (permanent) GART.
-		 *
-		 * Note, this will call r300ChooseTextureFormat.
-		 */
-		_mesa_store_teximage2d(ctx, target, level, internalFormat,
-				       width, height, border, format, type,
-				       pixels, &ctx->Unpack, texObj, texImage);
+	if (RADEON_DEBUG & DEBUG_TEXTURE)
+		fprintf(stderr, "%s: Using normal storage\n",
+			__FUNCTION__);
+
+	/* Normal path: copy (to cached memory) and eventually upload
+	 * via another copy to GART memory and then a blit...  Could
+	 * eliminate one copy by going straight to (permanent) GART.
+	 *
+	 * Note, this will call r300ChooseTextureFormat.
+	 */
+	_mesa_store_teximage2d(ctx, target, level, internalFormat,
+				width, height, border, format, type,
+				pixels, &ctx->Unpack, texObj, texImage);

-		t->dirty_images[face] |= (1 << level);
-	}
+	mark_texture_image_dirty(t, face, level);
 }

 static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
@@ -656,7 +501,7 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
 			      struct gl_texture_object *texObj,
 			      struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);
 	GLuint face;

 	/* which cube face or ordinary 2D image */
@@ -675,22 +520,11 @@ static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
 		face = 0;
 	}

-	assert(t);		/* this _should_ be true */
-	if (t) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
-			return;
-		}
-	}
-
 	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
 				  height, format, type, pixels, packing, texObj,
 				  texImage);

-	t->dirty_images[face] |= (1 << level);
+	mark_texture_image_dirty(t, face, level);
 }

 static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
@@ -700,7 +534,7 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
 				     struct gl_texture_object *texObj,
 				     struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);
 	GLuint face;

 	/* which cube face or ordinary 2D image */
@@ -719,49 +553,24 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
 		face = 0;
 	}

-	if (t != NULL) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY,
-				    "glCompressedTexImage2D");
-			return;
-		}
-	}
-
 	texImage->IsClientData = GL_FALSE;

-	/* can't call this, different parameters. Would never evaluate to true anyway currently */
-#if 0
-	if (r300ValidateClientStorage(ctx, target,
-				      internalFormat,
-				      width, height,
-				      format, type, pixels,
-				      packing, texObj, texImage)) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: Using client storage\n",
-				__FUNCTION__);
-	} else
-#endif
-	{
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: Using normal storage\n",
-				__FUNCTION__);
-
-		/* Normal path: copy (to cached memory) and eventually upload
-		 * via another copy to GART memory and then a blit...  Could
-		 * eliminate one copy by going straight to (permanent) GART.
-		 *
-		 * Note, this will call r300ChooseTextureFormat.
-		 */
-		_mesa_store_compressed_teximage2d(ctx, target, level,
-						  internalFormat, width, height,
-						  border, imageSize, data,
-						  texObj, texImage);
+	if (RADEON_DEBUG & DEBUG_TEXTURE)
+		fprintf(stderr, "%s: Using normal storage\n",
+			__FUNCTION__);
+
+	/* Normal path: copy (to cached memory) and eventually upload
+	 * via another copy to GART memory and then a blit...  Could
+	 * eliminate one copy by going straight to (permanent) GART.
+	 *
+	 * Note, this will call r300ChooseTextureFormat.
+	 */
+	_mesa_store_compressed_teximage2d(ctx, target, level,
+						internalFormat, width, height,
+						border, imageSize, data,
+						texObj, texImage);

-		t->dirty_images[face] |= (1 << level);
-	}
+	mark_texture_image_dirty(t, face, level);
 }

 static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
@@ -772,7 +581,7 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
 					struct gl_texture_object *texObj,
 					struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);
 	GLuint face;

 	/* which cube face or ordinary 2D image */
@@ -791,23 +600,11 @@ static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
 		face = 0;
 	}

-	assert(t);		/* this _should_ be true */
-	if (t) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY,
-				    "glCompressedTexSubImage3D");
-			return;
-		}
-	}
-
 	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
 					     yoffset, width, height, format,
 					     imageSize, data, texObj, texImage);

-	t->dirty_images[face] |= (1 << level);
+	mark_texture_image_dirty(t, face, level);
 }

 static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
@@ -819,49 +616,26 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
 			   struct gl_texture_object *texObj,
 			   struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
-	if (t) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
-			return;
-		}
-	}
+	r300TexObj* t = r300_tex_obj(texObj);

 	texImage->IsClientData = GL_FALSE;

-#if 0
-	if (r300ValidateClientStorage(ctx, target,
-				      internalFormat,
-				      width, height,
-				      format, type, pixels,
-				      packing, texObj, texImage)) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: Using client storage\n",
-				__FUNCTION__);
-	} else
-#endif
-	{
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: Using normal storage\n",
-				__FUNCTION__);
-
-		/* Normal path: copy (to cached memory) and eventually upload
-		 * via another copy to GART memory and then a blit...  Could
-		 * eliminate one copy by going straight to (permanent) GART.
-		 *
-		 * Note, this will call r300ChooseTextureFormat.
-		 */
-		_mesa_store_teximage3d(ctx, target, level, internalFormat,
-				       width, height, depth, border,
-				       format, type, pixels,
-				       &ctx->Unpack, texObj, texImage);
+	if (RADEON_DEBUG & DEBUG_TEXTURE)
+		fprintf(stderr, "%s: Using normal storage\n",
+			__FUNCTION__);
+
+	/* Normal path: copy (to cached memory) and eventually upload
+	 * via another copy to GART memory and then a blit...  Could
+	 * eliminate one copy by going straight to (permanent) GART.
+	 *
+	 * Note, this will call r300ChooseTextureFormat.
+	 */
+	_mesa_store_teximage3d(ctx, target, level, internalFormat,
+				width, height, depth, border,
+				format, type, pixels,
+				&ctx->Unpack, texObj, texImage);

-		t->dirty_images[0] |= (1 << level);
-	}
+	mark_texture_image_dirty(t, 0, level);
 }

 static void
@@ -874,28 +648,14 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
 		  struct gl_texture_object *texObj,
 		  struct gl_texture_image *texImage)
 {
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-
-/*     fprintf(stderr, "%s\n", __FUNCTION__); */
-
-	assert(t);		/* this _should_ be true */
-	if (t) {
-		driSwapOutTextureObject(t);
-	} else {
-		t = (driTextureObject *) r300AllocTexObj(texObj);
-		if (!t) {
-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
-			return;
-		}
-		texObj->DriverData = t;
-	}
+	r300TexObj* t = r300_tex_obj(texObj);

 	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
 				  width, height, depth,
 				  format, type, pixels, packing, texObj,
 				  texImage);

-	t->dirty_images[0] |= (1 << level);
+	mark_texture_image_dirty(t, 0, level);
 }

 /**
@@ -907,7 +667,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
 			     struct gl_texture_object *texObj,
 			     GLenum pname, const GLfloat * params)
 {
-	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);

 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
@@ -940,7 +700,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
 		 * we just have to rely on loading the right subset of mipmap levels
 		 * to simulate a clamped LOD.
 		 */
-		driSwapOutTextureObject((driTextureObject *) t);
+		if (t->mt) {
+			r300_miptree_destroy(t->mt);
+			t->mt = 0;
+		}
 		break;

 	case GL_DEPTH_TEXTURE_MODE:
@@ -963,27 +726,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
 	}
 }

-static void r300BindTexture(GLcontext * ctx, GLenum target,
-			    struct gl_texture_object *texObj)
-{
-	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
-			(void *)texObj, ctx->Texture.CurrentUnit);
-	}
-
-	if ((target == GL_TEXTURE_1D)
-	    || (target == GL_TEXTURE_2D)
-	    || (target == GL_TEXTURE_3D)
-	    || (target == GL_TEXTURE_CUBE_MAP)
-	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
-		assert(texObj->DriverData != NULL);
-	}
-}
-
 static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+	r300TexObj* t = r300_tex_obj(texObj);

 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
@@ -991,14 +737,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
 			_mesa_lookup_enum_by_nr(texObj->Target));
 	}

-	if (t != NULL) {
-		if (rmesa) {
-			R300_FIREVERTICES(rmesa);
-		}
+	if (rmesa) {
+		int i;
+		R300_FIREVERTICES(rmesa);
+
+		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
+			if (rmesa->hw.textures[i] == t)
+				rmesa->hw.textures[i] = 0;
+	}

-		driDestroyTextureObject(t);
+	if (t->mt) {
+		r300_miptree_destroy(t->mt);
+		t->mt = 0;
 	}
-	/* Free mipmap images and the texture object itself */
 	_mesa_delete_texture_object(ctx, texObj);
 }

@@ -1007,8 +758,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
  * Called via ctx->Driver.NewTextureObject.
  * Note: this function will be called during context creation to
  * allocate the default texture objects.
- * Note: we could use containment here to 'derive' the driver-specific
- * texture object from the core mesa gl_texture_object.  Not done at this time.
  * Fixup MaxAnisotropy according to user preference.
  */
 static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
@@ -1016,14 +765,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
 						      GLenum target)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	struct gl_texture_object *obj;
-	obj = _mesa_new_texture_object(ctx, name, target);
-	if (!obj)
-		return NULL;
-	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+	r300TexObj* t = CALLOC_STRUCT(r300_tex_obj);
+
+
+	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+			t, _mesa_lookup_enum_by_nr(target));
+	}
+
+	_mesa_initialize_texture_object(&t->base, name, target);
+	t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy;
+
+	/* Initialize hardware state */
+	r300UpdateTexWrap(t);
+	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+	r300SetTexBorderColor(t, t->base._BorderChan);

-	r300AllocTexObj(obj);
-	return obj;
+	return &t->base;
 }

 void r300InitTextureFuncs(struct dd_function_table *functions)
@@ -1039,7 +797,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
 	functions->TexSubImage2D = r300TexSubImage2D;
 	functions->TexSubImage3D = r300TexSubImage3D;
 	functions->NewTextureObject = r300NewTextureObject;
-	functions->BindTexture = r300BindTexture;
 	functions->DeleteTexture = r300DeleteTexture;
 	functions->IsTextureResident = driIsTextureResident;

diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index b86d45b..5d7f21e 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -46,8 +46,6 @@ extern void r300UpdateTextureState(GLcontext * ctx);
 extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
 			       GLuint face);

-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
-
 extern void r300InitTextureFuncs(struct dd_function_table *functions);

 #endif				/* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
index 69847a4..b3b501b 100644
--- a/src/mesa/drivers/dri/r300/r300_texmem.c
+++ b/src/mesa/drivers/dri/r300/r300_texmem.c
@@ -48,439 +48,15 @@ SOFTWARE.
 #include "r300_context.h"
 #include "r300_state.h"
 #include "r300_cmdbuf.h"
+#include "r300_emit.h"
+#include "r300_mipmap_tree.h"
 #include "radeon_ioctl.h"
 #include "r300_tex.h"
 #include "r300_ioctl.h"
 #include <unistd.h>		/* for usleep() */

-#ifdef USER_BUFFERS
 #include "r300_mem.h"
-#endif

-/**
- * Destroy any device-dependent state associated with the texture.  This may
- * include NULLing out hardware state that points to the texture.
- */
-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
-{
-	int i;
-
-	if (RADEON_DEBUG & DEBUG_TEXTURE) {
-		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
-			(void *)t, (void *)t->base.tObj);
-	}
-
-	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
-		if (rmesa->state.texture.unit[i].texobj == t) {
-			rmesa->state.texture.unit[i].texobj = NULL;
-		}
-	}
-}
-
-/* ------------------------------------------------------------
- * Texture image conversions
- */
-
-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
-					 r300TexObjPtr t,
-					 struct gl_texture_image *texImage,
-					 GLint hwlevel,
-					 GLint x, GLint y,
-					 GLint width, GLint height)
-{
-	const struct gl_texture_format *texFormat = texImage->TexFormat;
-	GLuint srcPitch, dstPitch;
-	int blit_format;
-	int srcOffset;
-
-	/*
-	 * XXX it appears that we always upload the full image, not a subimage.
-	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
-	 * changed, the src pitch will have to change.
-	 */
-	switch (texFormat->TexelBytes) {
-	case 1:
-		blit_format = R300_CP_COLOR_FORMAT_CI8;
-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
-		break;
-	case 2:
-		blit_format = R300_CP_COLOR_FORMAT_RGB565;
-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
-		break;
-	case 4:
-		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
-		break;
-	case 8:
-	case 16:
-		blit_format = R300_CP_COLOR_FORMAT_CI8;
-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
-		break;
-	default:
-		return;
-	}
-
-	t->image[0][hwlevel].data = texImage->Data;
-	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
-
-	assert(srcOffset != ~0);
-
-	/* Don't currently need to cope with small pitches?
-	 */
-	width = texImage->Width;
-	height = texImage->Height;
-
-	if (texFormat->TexelBytes > 4) {
-		width *= texFormat->TexelBytes;
-	}
-
-	r300EmitWait(rmesa, R300_WAIT_3D);
-
-	r300EmitBlit(rmesa, blit_format,
-		     srcPitch,
-		     srcOffset,
-		     dstPitch,
-		     t->bufAddr,
-		     x,
-		     y,
-		     t->image[0][hwlevel].x + x,
-		     t->image[0][hwlevel].y + y, width, height);
-
-	r300EmitWait(rmesa, R300_WAIT_2D);
-}
-
-static void r300UploadRectSubImage(r300ContextPtr rmesa,
-				   r300TexObjPtr t,
-				   struct gl_texture_image *texImage,
-				   GLint x, GLint y, GLint width, GLint height)
-{
-	const struct gl_texture_format *texFormat = texImage->TexFormat;
-	int blit_format, dstPitch, done;
-
-	switch (texFormat->TexelBytes) {
-	case 1:
-		blit_format = R300_CP_COLOR_FORMAT_CI8;
-		break;
-	case 2:
-		blit_format = R300_CP_COLOR_FORMAT_RGB565;
-		break;
-	case 4:
-		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
-		break;
-	case 8:
-	case 16:
-		blit_format = R300_CP_COLOR_FORMAT_CI8;
-		break;
-	default:
-		return;
-	}
-
-	t->image[0][0].data = texImage->Data;
-
-	/* Currently don't need to cope with small pitches.
-	 */
-	width = texImage->Width;
-	height = texImage->Height;
-	dstPitch = t->pitch;
-
-	if (texFormat->TexelBytes > 4) {
-		width *= texFormat->TexelBytes;
-	}
-
-	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
-		/* In this case, could also use GART texturing.  This is
-		 * currently disabled, but has been tested & works.
-		 */
-		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
-		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
-
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr,
-				"Using GART texturing for rectangular client texture\n");
-
-		/* Release FB memory allocated for this image:
-		 */
-		/* FIXME This may not be correct as driSwapOutTextureObject sets
-		 * FIXME dirty_images.  It may be fine, though.
-		 */
-		if (t->base.memBlock) {
-			driSwapOutTextureObject((driTextureObject *) t);
-		}
-	} else if (texImage->IsClientData) {
-		/* Data already in GART memory, with usable pitch.
-		 */
-		GLuint srcPitch;
-		srcPitch = texImage->RowStride * texFormat->TexelBytes;
-		r300EmitBlit(rmesa,
-			     blit_format,
-			     srcPitch,
-			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
-			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
-	} else {
-		/* Data not in GART memory, or bad pitch.
-		 */
-		for (done = 0; done < height;) {
-			struct r300_dma_region region;
-			int lines =
-			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
-			int src_pitch;
-			char *tex;
-
-			src_pitch = texImage->RowStride * texFormat->TexelBytes;
-
-			tex = (char *)texImage->Data + done * src_pitch;
-
-			memset(&region, 0, sizeof(region));
-			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
-					   1024);
-
-			/* Copy texdata to dma:
-			 */
-			if (RADEON_DEBUG & DEBUG_TEXTURE)
-				fprintf(stderr,
-					"%s: src_pitch %d dst_pitch %d\n",
-					__FUNCTION__, src_pitch, dstPitch);
-
-			if (src_pitch == dstPitch) {
-				memcpy(region.address + region.start, tex,
-				       lines * src_pitch);
-			} else {
-				char *buf = region.address + region.start;
-				int i;
-				for (i = 0; i < lines; i++) {
-					memcpy(buf, tex, src_pitch);
-					buf += dstPitch;
-					tex += src_pitch;
-				}
-			}
-
-			r300EmitWait(rmesa, R300_WAIT_3D);
-
-			/* Blit to framebuffer
-			 */
-			r300EmitBlit(rmesa,
-				     blit_format,
-				     dstPitch, GET_START(&region),
-				     dstPitch | (t->tile_bits >> 16),
-				     t->bufAddr, 0, 0, 0, done, width, lines);
-
-			r300EmitWait(rmesa, R300_WAIT_2D);
-#ifdef USER_BUFFERS
-			r300_mem_use(rmesa, region.buf->id);
-#endif
-
-			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
-			done += lines;
-		}
-	}
-}
-
-/**
- * Upload the texture image associated with texture \a t at the specified
- * level at the address relative to \a start.
- */
-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
-			       GLint hwlevel,
-			       GLint x, GLint y, GLint width, GLint height,
-			       GLuint face)
-{
-	struct gl_texture_image *texImage = NULL;
-	GLuint offset;
-	GLint imageWidth, imageHeight;
-	GLint ret;
-	drm_radeon_texture_t tex;
-	drm_radeon_tex_image_t tmp;
-	const int level = hwlevel + t->base.firstLevel;
-
-	if (RADEON_DEBUG & DEBUG_TEXTURE) {
-		fprintf(stderr,
-			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
-			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
-			width, height, face);
-	}
-
-	ASSERT(face < 6);
-
-	/* Ensure we have a valid texture to upload */
-	if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
-		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
-		return;
-	}
-
-	texImage = t->base.tObj->Image[face][level];
-
-	if (!texImage) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: texImage %d is NULL!\n",
-				__FUNCTION__, level);
-		return;
-	}
-	if (!texImage->Data) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: image data is NULL!\n",
-				__FUNCTION__);
-		return;
-	}
-
-	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-		assert(level == 0);
-		assert(hwlevel == 0);
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr, "%s: image data is rectangular\n",
-				__FUNCTION__);
-		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
-		return;
-	} else if (texImage->IsClientData) {
-		if (RADEON_DEBUG & DEBUG_TEXTURE)
-			fprintf(stderr,
-				"%s: image data is in GART client storage\n",
-				__FUNCTION__);
-		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
-					     width, height);
-		return;
-	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
-		fprintf(stderr, "%s: image data is in normal memory\n",
-			__FUNCTION__);
-
-	imageWidth = texImage->Width;
-	imageHeight = texImage->Height;
-
-	offset = t->bufAddr;
-
-	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
-		GLint imageX = 0;
-		GLint imageY = 0;
-		GLint blitX = t->image[face][hwlevel].x;
-		GLint blitY = t->image[face][hwlevel].y;
-		GLint blitWidth = t->image[face][hwlevel].width;
-		GLint blitHeight = t->image[face][hwlevel].height;
-		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
-			imageWidth, imageHeight, imageX, imageY);
-		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
-			blitWidth, blitHeight, blitX, blitY);
-		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
-			(GLuint) offset, hwlevel, level);
-	}
-
-	t->image[face][hwlevel].data = texImage->Data;
-
-	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
-	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
-	 * We used to use 1, 2 and 4-byte texels and used to use the texture
-	 * width to dictate the blit width - but that won't work for compressed
-	 * textures. (Brian)
-	 * NOTE: can't do that with texture tiling. (sroland)
-	 */
-	tex.offset = offset;
-	tex.image = &tmp;
-	/* copy (x,y,width,height,data) */
-	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
-
-	if (texImage->TexFormat->TexelBytes > 4) {
-		const int log2TexelBytes =
-		    (3 + (texImage->TexFormat->TexelBytes >> 4));
-		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
-		tex.pitch =
-		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
-			 64, 1);
-		tex.height = imageHeight;
-		tex.width = imageWidth << log2TexelBytes;
-		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
-		tmp.x = tmp.x % (1024 >> log2TexelBytes);
-		tmp.width = tmp.width << log2TexelBytes;
-	} else if (texImage->TexFormat->TexelBytes) {
-		/* use multi-byte upload scheme */
-		tex.height = imageHeight;
-		tex.width = imageWidth;
-		switch (texImage->TexFormat->TexelBytes) {
-		case 1:
-			tex.format = RADEON_TXFORMAT_I8;
-			break;
-		case 2:
-			tex.format = RADEON_TXFORMAT_AI88;
-			break;
-		case 4:
-			tex.format = RADEON_TXFORMAT_ARGB8888;
-			break;
-		}
-		tex.pitch =
-		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
-			 64, 1);
-		tex.offset += tmp.x & ~1023;
-		tmp.x = tmp.x % 1024;
-
-		if (t->tile_bits & R300_TXO_MICRO_TILE) {
-			/* need something like "tiled coordinates" ? */
-			tmp.y = tmp.x / (tex.pitch * 128) * 2;
-			tmp.x =
-			    tmp.x % (tex.pitch * 128) / 2 /
-			    texImage->TexFormat->TexelBytes;
-			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
-		} else {
-			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
-		}
-#if 1
-		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
-		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
-		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
-			 && (texImage->Height >= 8))
-			|| (texImage->Height >= 16))) {
-			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
-			   OR if height is smaller than 8 automatically, but if micro tiling is active
-			   the limit is height 16 instead ? */
-			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
-		}
-#endif
-	} else {
-		/* In case of for instance 8x8 texture (2x2 dxt blocks),
-		   padding after the first two blocks is needed (only
-		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
-		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
-		   has 4 real pixels. Needed so the kernel module reads
-		   the right amount of data. */
-		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
-		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
-		tex.height = (imageHeight + 3) / 4;
-		tex.width = (imageWidth + 3) / 4;
-		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
-			tex.width *= 8;
-		} else {
-			tex.width *= 16;
-		}
-	}
-
-	LOCK_HARDWARE(&rmesa->radeon);
-	do {
-		ret =
-		    drmCommandWriteRead(rmesa->radeon.dri.fd,
-					DRM_RADEON_TEXTURE, &tex,
-					sizeof(drm_radeon_texture_t));
-		if (ret) {
-			if (RADEON_DEBUG & DEBUG_IOCTL)
-				fprintf(stderr,
-					"DRM_RADEON_TEXTURE:  again!\n");
-			usleep(1);
-		}
-	} while (ret == -EAGAIN);
-
-	UNLOCK_HARDWARE(&rmesa->radeon);
-
-	if (ret) {
-		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
-		fprintf(stderr, "   offset=0x%08x\n", offset);
-		fprintf(stderr, "   image width=%d height=%d\n",
-			imageWidth, imageHeight);
-		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
-			t->image[face][hwlevel].width,
-			t->image[face][hwlevel].height,
-			t->image[face][hwlevel].data);
-		_mesa_exit(-1);
-	}
-}

 /**
  * Upload the texture images associated with texture \a t.  This might
@@ -493,69 +69,32 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,

 int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
 {
-	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
 	if (t->image_override)
 		return 0;
+	if (!t->mt)
+		return 0;

 	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
-		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
-			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
-			t->base.totalSize, t->base.firstLevel,
-			t->base.lastLevel);
+		fprintf(stderr, "%s( %p, %p ) lvls=%d-%d\n", __FUNCTION__,
+			(void *)rmesa->radeon.glCtx, t,
+			t->mt->firstLevel, t->mt->lastLevel);
 	}

-	if (t->base.totalSize == 0)
-		return 0;
-
 	if (RADEON_DEBUG & DEBUG_SYNC) {
 		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
 		radeonFinish(rmesa->radeon.glCtx);
 	}

-	LOCK_HARDWARE(&rmesa->radeon);
-
-	if (t->base.memBlock == NULL) {
-		int heap;
-
-		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
-					  (driTextureObject *) t);
-		if (heap == -1) {
-			UNLOCK_HARDWARE(&rmesa->radeon);
-			return -1;
-		}
-
-		/* Set the base offset of the texture image */
-		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
-		    + t->base.memBlock->ofs;
-		t->offset = t->bufAddr;
-
-		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
-			/* hope it's safe to add that here... */
-			t->offset |= t->tile_bits;
-		}
-	}
-
-	/* Let the world know we've used this memory recently.
-	 */
-	driUpdateTextureLRU((driTextureObject *) t);
-	UNLOCK_HARDWARE(&rmesa->radeon);
-
 	/* Upload any images that are new */
-	if (t->base.dirty_images[face]) {
-		int i;
+	if (t->dirty_images[face]) {
+		int i, numLevels = t->mt->lastLevel - t->mt->firstLevel + 1;
 		for (i = 0; i < numLevels; i++) {
-			if ((t->base.
-			     dirty_images[face] & (1 <<
-						   (i + t->base.firstLevel))) !=
-			    0) {
-				r300UploadSubImage(rmesa, t, i, 0, 0,
-						   t->image[face][i].width,
-						   t->image[face][i].height,
-						   face);
+			if (t->dirty_images[face] & (1 << (i + t->mt->firstLevel))) {
+				r300_miptree_upload_image(t->mt, face, t->mt->firstLevel + i,
+					t->base.Image[face][t->mt->firstLevel + i]);
 			}
 		}
-		t->base.dirty_images[face] = 0;
+		t->dirty_images[face] = 0;
 	}

 	if (RADEON_DEBUG & DEBUG_SYNC) {
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index bdd20b1..1b24738 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_state.h"
 #include "r300_ioctl.h"
 #include "radeon_ioctl.h"
+#include "r300_mipmap_tree.h"
 #include "r300_tex.h"
 #include "r300_reg.h"

@@ -148,8 +149,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
 	if (!tObj)
 		return;

-	t = (r300TexObjPtr) tObj->DriverData;
-
+	t = r300_tex_obj(tObj);

 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
 	case MESA_FORMAT_Z16:
@@ -189,118 +189,59 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
 }


-/**
- * Compute sizes and fill in offset and blit information for the given
- * image (determined by \p face and \p level).
- *
- * \param curOffset points to the offset at which the image is to be stored
- * and is updated by this function according to the size of the image.
- */
-static void compute_tex_image_offset(
-	struct gl_texture_object *tObj,
-	GLuint face,
-	GLint level,
-	GLint* curOffset)
+static void calculate_first_last_level(struct gl_texture_object *tObj,
+				       GLuint *pfirstLevel, GLuint *plastLevel)
 {
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-	const struct gl_texture_image* texImage;
-	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
-	GLuint texelBytes;
-	GLuint size;
-
-	texImage = tObj->Image[0][level + t->base.firstLevel];
-	if (!texImage)
-		return;
-
-	texelBytes = texImage->TexFormat->TexelBytes;
-
-	/* find image size in bytes */
-	if (texImage->IsCompressed) {
-		if ((t->format & R300_TX_FORMAT_DXT1) ==
-			R300_TX_FORMAT_DXT1) {
-			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
-			if ((texImage->Width + 3) < 8)	/* width one block */
-				size = texImage->CompressedSize * 4;
-			else if ((texImage->Width + 3) < 16)
-				size = texImage->CompressedSize * 2;
-			else
-				size = texImage->CompressedSize;
+	const struct gl_texture_image * const baseImage =
+		tObj->Image[0][tObj->BaseLevel];
+
+	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
+	* and having firstLevel and lastLevel as signed prevents the need for
+	* extra sign checks.
+	*/
+	int   firstLevel;
+	int   lastLevel;
+
+	/* Yes, this looks overly complicated, but it's all needed.
+	*/
+	switch (tObj->Target) {
+	case GL_TEXTURE_1D:
+	case GL_TEXTURE_2D:
+	case GL_TEXTURE_3D:
+	case GL_TEXTURE_CUBE_MAP:
+		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+			*/
+			firstLevel = lastLevel = tObj->BaseLevel;
 		} else {
-			/* DXT3/5, 16 bytes per block */
-			WARN_ONCE
-				("DXT 3/5 suffers from multitexturing problems!\n");
-			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
-			if ((texImage->Width + 3) < 8)
-				size = texImage->CompressedSize * 2;
-			else
-				size = texImage->CompressedSize;
+			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+			firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
+			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+			lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
 		}
-	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-		size =
-			((texImage->Width * texelBytes +
-			63) & ~63) * texImage->Height;
-		blitWidth = 64 / texelBytes;
-	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
-		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-			though the actual offset may be different (if texture is less than
-			32 bytes width) to the untiled case */
-		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
-		size =
-			(w * ((texImage->Height + 1) / 2)) *
-			texImage->Depth;
-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-	} else {
-		int w = (texImage->Width * texelBytes + 31) & ~31;
-		size = w * texImage->Height * texImage->Depth;
-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-	}
-	assert(size > 0);
-
-	if (RADEON_DEBUG & DEBUG_TEXTURE)
-		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
-			texImage->Width, texImage->Height,
-			texImage->Depth,
-			texImage->TexFormat->TexelBytes,
-			texImage->InternalFormat);
-
-	/* All images are aligned to a 32-byte offset */
-	*curOffset = (*curOffset + 0x1f) & ~0x1f;
-
-	if (texelBytes) {
-		/* fix x and y coords up later together with offset */
-		t->image[face][level].x = *curOffset;
-		t->image[face][level].y = 0;
-		t->image[face][level].width =
-			MIN2(size / texelBytes, blitWidth);
-		t->image[face][level].height =
-			(size / texelBytes) / t->image[face][level].width;
-	} else {
-		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
-		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
-		t->image[face][level].width =
-			MIN2(size, R300_BLIT_WIDTH_BYTES);
-		t->image[face][level].height = size / t->image[face][level].width;
+		break;
+	case GL_TEXTURE_RECTANGLE_NV:
+	case GL_TEXTURE_4D_SGIS:
+		firstLevel = lastLevel = 0;
+		break;
+	default:
+		return;
 	}

-	if (RADEON_DEBUG & DEBUG_TEXTURE)
-		fprintf(stderr,
-			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
-			level, face, texImage->Width, texImage->Height,
-			t->image[face][level].x, t->image[face][level].y,
-			t->image[face][level].width, t->image[face][level].height,
-			size, *curOffset);
-
-	*curOffset += size;
+	/* save these values */
+	*pfirstLevel = firstLevel;
+	*plastLevel = lastLevel;
 }


-
 /**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here.  \c filter, \c format, etc. will be set here
- * too.
+ * This function ensures a validated miptree is available.
+ *
+ * Additionally, some texture format bits are configured here.
  *
  * \param rmesa Context pointer
  * \param tObj GL texture object whose images are to be posted to
@@ -309,13 +250,13 @@ static void compute_tex_image_offset(
 static void r300SetTexImages(r300ContextPtr rmesa,
 			     struct gl_texture_object *tObj)
 {
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+	r300TexObjPtr t = r300_tex_obj(tObj);
 	const struct gl_texture_image *baseImage =
 	    tObj->Image[0][tObj->BaseLevel];
-	GLint curOffset;
-	GLint i, texelBytes;
-	GLint numLevels;
-	GLint log2Width, log2Height, log2Depth;
+	GLint texelBytes;
+	GLuint firstLevel = 0, lastLevel = 0;
+
+	calculate_first_last_level(tObj, &firstLevel, &lastLevel);

 	/* Set the hardware texture format
 	 */
@@ -335,107 +276,59 @@ static void r300SetTexImages(r300ContextPtr rmesa,
 	}

 	texelBytes = baseImage->TexFormat->TexelBytes;
-
-	/* Compute which mipmap levels we really want to send to the hardware.
-	 */
-	driCalculateTextureFirstLastLevel((driTextureObject *) t);
-	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
-	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
-	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
-
-	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
-	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-
-	/* Calculate mipmap offsets and dimensions for blitting (uploading)
-	 * The idea is that we lay out the mipmap levels within a block of
-	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
-	 */
 	t->tile_bits = 0;

-	/* figure out if this texture is suitable for tiling. */
-#if 0				/* Disabled for now */
-	if (texelBytes) {
-		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
-		    /* texrect might be able to use micro tiling too in theory? */
-		    (baseImage->Height > 1)) {
-
-			/* allow 32 (bytes) x 1 mip (which will use two times the space
-			   the non-tiled version would use) max if base texture is large enough */
-			if ((numLevels == 1) ||
-			    (((baseImage->Width * texelBytes /
-			       baseImage->Height) <= 32)
-			     && (baseImage->Width * texelBytes > 64))
-			    ||
-			    ((baseImage->Width * texelBytes /
-			      baseImage->Height) <= 16)) {
-				t->tile_bits |= R300_TXO_MICRO_TILE;
-			}
-		}
+	if (tObj->Target == GL_TEXTURE_CUBE_MAP)
+		t->format |= R300_TX_FORMAT_CUBIC_MAP;

-		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
-			/* we can set macro tiling even for small textures, they will be untiled anyway */
-			t->tile_bits |= R300_TXO_MACRO_TILE;
+	if (!t->image_override) {
+		GLuint compressed = baseImage->IsCompressed ? baseImage->TexFormat->MesaFormat : 0;
+
+		if (t->mt) {
+			if (t->mt->firstLevel != firstLevel ||
+			    t->mt->lastLevel != lastLevel ||
+			    t->mt->width0 != baseImage->Width ||
+			    t->mt->height0 != baseImage->Height ||
+			    t->mt->depth0 != baseImage->Depth ||
+			    t->mt->bpp != texelBytes ||
+			    t->mt->tilebits != t->tile_bits ||
+			    t->mt->compressed != compressed) {
+				r300_miptree_destroy(t->mt);
+				t->mt = 0;
+			}
 		}
-	}
-#endif
-
-	curOffset = 0;

-	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-		ASSERT(log2Width == log2Height);
-		t->format |= R300_TX_FORMAT_CUBIC_MAP;
-
-		for(i = 0; i < numLevels; i++) {
-			GLuint face;
-			for(face = 0; face < 6; face++)
-				compute_tex_image_offset(tObj, face, i, &curOffset);
+		if (!t->mt) {
+			t->mt = r300_miptree_create(rmesa, t, tObj->Target,
+				firstLevel, lastLevel,
+				baseImage->Width, baseImage->Height, baseImage->Depth,
+				texelBytes, t->tile_bits, compressed);
+			memset(t->dirty_images, 0xff, sizeof(t->dirty_images));
 		}
-	} else {
-		for (i = 0; i < numLevels; i++)
-			compute_tex_image_offset(tObj, 0, i, &curOffset);
 	}

-	/* Align the total size of texture memory block.
-	 */
-	t->base.totalSize =
-	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
-	t->size =
-	    (((tObj->Image[0][t->base.firstLevel]->Width -
-	       1) << R300_TX_WIDTHMASK_SHIFT)
-	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
-		R300_TX_HEIGHTMASK_SHIFT))
-	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
-
+	t->size = (((tObj->Image[0][firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
+		| ((tObj->Image[0][firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT))
+		| ((lastLevel - firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT);
 	t->pitch = 0;

-	/* Only need to round to nearest 32 for textures, but the blitter
-	 * requires 64-byte aligned pitches, and we may/may not need the
-	 * blitter.   NPOT only!
-	 */
 	if (baseImage->IsCompressed) {
-		t->pitch |=
-		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+		t->pitch |= (tObj->Image[0][firstLevel]->Width + 63) & ~(63);
 	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
 		unsigned int align = (64 / texelBytes) - 1;
-		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
+		t->pitch |= ((tObj->Image[0][firstLevel]->Width *
 			     texelBytes) + 63) & ~(63);
 		t->size |= R300_TX_SIZE_TXPITCH_EN;
 		if (!t->image_override)
-			t->pitch_reg =
-			    (((tObj->Image[0][t->base.firstLevel]->Width) +
-			      align) & ~align) - 1;
+			t->pitch_reg = (((tObj->Image[0][firstLevel]->Width) + align) & ~align) - 1;
 	} else {
-		t->pitch |=
-		    ((tObj->Image[0][t->base.firstLevel]->Width *
-		      texelBytes) + 63) & ~(63);
+		t->pitch |= ((tObj->Image[0][firstLevel]->Width * texelBytes) + 63) & ~(63);
 	}

 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
+	    if (tObj->Image[0][firstLevel]->Width > 2048)
 		t->pitch_reg |= R500_TXWIDTH_BIT11;
-	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
+	    if (tObj->Image[0][firstLevel]->Height > 2048)
 		t->pitch_reg |= R500_TXHEIGHT_BIT11;
 	}
 }
@@ -449,17 +342,15 @@ static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 	struct gl_texture_object *tObj = texUnit->_Current;
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+	r300TexObjPtr t = r300_tex_obj(tObj);

 	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);

-	if (t->base.dirty_images[0]) {
+	if (!t->mt || t->dirty_images[0]) {
 		R300_FIREVERTICES(rmesa);

 		r300SetTexImages(rmesa, tObj);
-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
-		if (!t->base.memBlock && !t->image_override)
-			return GL_FALSE;
+		r300UploadTexImages(rmesa, t, 0);
 	}

 	return GL_TRUE;
@@ -470,7 +361,7 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 	struct gl_texture_object *tObj = texUnit->_Current;
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+	r300TexObjPtr t = r300_tex_obj(tObj);

 	ASSERT(tObj->Target == GL_TEXTURE_3D);

@@ -479,12 +370,10 @@ static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
 		return GL_FALSE;
 	}

-	if (t->base.dirty_images[0]) {
+	if (!t->mt || t->dirty_images[0]) {
 		R300_FIREVERTICES(rmesa);
 		r300SetTexImages(rmesa, tObj);
-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
-		if (!t->base.memBlock)
-			return GL_FALSE;
+		r300UploadTexImages(rmesa, t, 0);
 	}

 	return GL_TRUE;
@@ -495,14 +384,15 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 	struct gl_texture_object *tObj = texUnit->_Current;
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+	r300TexObjPtr t = r300_tex_obj(tObj);
 	GLuint face;

 	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);

-	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
-	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
-	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
+	if (!t->mt ||
+	    t->dirty_images[0] || t->dirty_images[1] ||
+	    t->dirty_images[2] || t->dirty_images[3] ||
+	    t->dirty_images[4] || t->dirty_images[5]) {
 		/* flush */
 		R300_FIREVERTICES(rmesa);
 		/* layout memory space, once for all faces */
@@ -511,18 +401,11 @@ static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)

 	/* upload (per face) */
 	for (face = 0; face < 6; face++) {
-		if (t->base.dirty_images[face]) {
-			r300UploadTexImages(rmesa,
-					    (r300TexObjPtr) tObj->DriverData,
-					    face);
+		if (t->dirty_images[face]) {
+			r300UploadTexImages(rmesa, t, face);
 		}
 	}

-	if (!t->base.memBlock) {
-		/* texmem alloc failed, use s/w fallback */
-		return GL_FALSE;
-	}
-
 	return GL_TRUE;
 }

@@ -531,18 +414,15 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 	struct gl_texture_object *tObj = texUnit->_Current;
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+	r300TexObjPtr t = r300_tex_obj(tObj);

 	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);

-	if (t->base.dirty_images[0]) {
+	if (!t->mt || t->dirty_images[0]) {
 		R300_FIREVERTICES(rmesa);

 		r300SetTexImages(rmesa, tObj);
-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
-		if (!t->base.memBlock && !t->image_override &&
-		    !rmesa->prefer_gart_client_texturing)
-			return GL_FALSE;
+		r300UploadTexImages(rmesa, t, 0);
 	}

 	return GL_TRUE;
@@ -550,34 +430,19 @@ static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)

 static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
 {
-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 	struct gl_texture_object *tObj = texUnit->_Current;
-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+	r300TexObjPtr t = r300_tex_obj(tObj);

 	/* Fallback if there's a texture border */
 	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
 		return GL_FALSE;

-	/* Update state if this is a different texture object to last
-	 * time.
-	 */
-	if (rmesa->state.texture.unit[unit].texobj != t) {
-		if (rmesa->state.texture.unit[unit].texobj != NULL) {
-			/* The old texture is no longer bound to this texture unit.
-			 * Mark it as such.
-			 */
-
-			rmesa->state.texture.unit[unit].texobj->base.bound &=
-			    ~(1 << unit);
-		}
-
-		rmesa->state.texture.unit[unit].texobj = t;
-		t->base.bound |= (1 << unit);
-		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
-	}
+	/* Fallback if memory upload didn't work */
+	if (!t->mt)
+		return GL_FALSE;

-	return !t->border_fallback;
+	return GL_TRUE;
 }

 void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
@@ -586,20 +451,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
 	struct gl_texture_object *tObj =
 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
-	r300TexObjPtr t;
+	r300TexObjPtr t = r300_tex_obj(tObj);
 	uint32_t pitch_val;

 	if (!tObj)
 		return;

-	t = (r300TexObjPtr) tObj->DriverData;
-
 	t->image_override = GL_TRUE;

 	if (!offset)
 		return;

-	t->offset = offset;
+	t->override_offset = offset;
 	t->pitch_reg &= (1 << 13) -1;
 	pitch_val = pitch;

diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
index 3fc724a..a84c8fc 100644
--- a/src/mesa/drivers/dri/r300/radeon_context.c
+++ b/src/mesa/drivers/dri/r300/radeon_context.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "state.h"
 #include "matrix.h"
 #include "framebuffer.h"
+#include "drirenderbuffer.h"

 #include "drivers/common/driverfuncs.h"
 #include "swrast/swrast.h"
@@ -258,6 +259,52 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
     }
 }

+static void
+radeon_make_renderbuffer_current(radeonContextPtr radeon,
+				 GLframebuffer *draw)
+{
+	int size = radeon->radeonScreen->driScreen->fbSize;
+	void *map = 0;
+	/* if radeon->fake */
+	struct radeon_renderbuffer *rb;
+	uint32_t offset;
+	if (!radeon->bufmgr)
+		return;
+
+	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+
+		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->front.offset : radeon->radeonScreen->frontOffset;
+		if (!rb->bo)
+			rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "front buffer",
+						     radeon->radeonScreen->frontOffset, size, map,
+						     DRM_BO_FLAG_MEM_VRAM);
+		fprintf(stderr,"front is %p\n", rb->bo);
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->frontPitch;
+	}
+	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->back.offset : radeon->radeonScreen->backOffset;
+		if (!rb->bo)
+			rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "back buffer",
+						     radeon->radeonScreen->backOffset, size, map,
+						     DRM_BO_FLAG_MEM_VRAM);
+		fprintf(stderr,"back is %p\n", rb->bo);
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->backPitch;
+	}
+	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
+		offset = radeon->radeonScreen->kernel_mm ? radeon->radeonScreen->depth.offset : radeon->radeonScreen->depthOffset;
+		if (!rb->bo)
+			rb->bo = dri_bo_alloc_static(&radeon->bufmgr->base, "depth buffer",
+						     radeon->radeonScreen->depthOffset, size, map,
+						     DRM_BO_FLAG_MEM_VRAM);
+		fprintf(stderr,"depth is %p\n", rb->bo);
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch;
+	}
+}
+
+
 /* Force the context `c' to be the current context and associate with it
  * buffer `b'.
  */
@@ -265,51 +312,57 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
 			    __DRIdrawablePrivate * driDrawPriv,
 			    __DRIdrawablePrivate * driReadPriv)
 {
-	if (driContextPriv) {
-		radeonContextPtr radeon =
-			(radeonContextPtr) driContextPriv->driverPrivate;
+	radeonContextPtr radeon;
+	GLframebuffer *dfb, *rfb;

+	if (!driContextPriv) {
 		if (RADEON_DEBUG & DEBUG_DRI)
-			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
-				radeon->glCtx);
-
-		if (radeon->dri.drawable != driDrawPriv) {
-			if (driDrawPriv->swap_interval == (unsigned)-1) {
-				driDrawPriv->vblFlags =
-					(radeon->radeonScreen->irq != 0)
-					? driGetDefaultVBlankFlags(&radeon->
-								   optionCache)
-					: VBLANK_FLAG_NO_IRQ;
+			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+		_mesa_make_current(NULL, NULL, NULL);
+		return GL_TRUE;
+	}

-				driDrawableInitVBlank(driDrawPriv);
-			}
-		}
+	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+	dfb = driDrawPriv->driverPrivate;
+	rfb = driReadPriv->driverPrivate;

-		radeon->dri.readable = driReadPriv;
+	if (RADEON_DEBUG & DEBUG_DRI)
+		fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx);

-		if (radeon->dri.drawable != driDrawPriv ||
-		    radeon->lastStamp != driDrawPriv->lastStamp) {
-			radeon->dri.drawable = driDrawPriv;
+	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
+	if (driReadPriv != driDrawPriv)
+		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);

-			radeonSetCliprects(radeon);
-			r300UpdateViewportOffset(radeon->glCtx);
-		}
+	radeon_make_renderbuffer_current(radeon, dfb);

-		_mesa_make_current(radeon->glCtx,
-				    (GLframebuffer *) driDrawPriv->
-				    driverPrivate,
-				    (GLframebuffer *) driReadPriv->
-				    driverPrivate);
+	_mesa_make_current(radeon->glCtx, dfb, rfb);

-		_mesa_update_state(radeon->glCtx);
+	if (radeon->dri.drawable != driDrawPriv) {
+		if (driDrawPriv->swap_interval == (unsigned)-1) {
+			driDrawPriv->vblFlags =
+				(radeon->radeonScreen->irq != 0)
+				? driGetDefaultVBlankFlags(&radeon->
+							   optionCache)
+					: VBLANK_FLAG_NO_IRQ;
+
+			driDrawableInitVBlank(driDrawPriv);
+		}
+	}

-		radeonUpdatePageFlipping(radeon);
-	} else {
-		if (RADEON_DEBUG & DEBUG_DRI)
-			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
-		_mesa_make_current(0, 0, 0);
+	radeon->dri.readable = driReadPriv;
+
+	if (radeon->dri.drawable != driDrawPriv ||
+	    radeon->lastStamp != driDrawPriv->lastStamp) {
+		radeon->dri.drawable = driDrawPriv;
+
+		radeonSetCliprects(radeon);
+		r300UpdateViewportOffset(radeon->glCtx);
 	}

+	_mesa_update_state(radeon->glCtx);
+
+	radeonUpdatePageFlipping(radeon);
+
 	if (RADEON_DEBUG & DEBUG_DRI)
 		fprintf(stderr, "End %s\n", __FUNCTION__);
 	return GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
index 7458d63..828853b 100644
--- a/src/mesa/drivers/dri/r300/radeon_context.h
+++ b/src/mesa/drivers/dri/r300/radeon_context.h
@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "drm.h"
 #include "dri_util.h"
 #include "colormac.h"
+#include "radeon_buffer.h"

 struct radeon_context;
 typedef struct radeon_context radeonContextRec;
@@ -132,12 +133,13 @@ struct radeon_scissor_state {

 struct radeon_colorbuffer_state {
 	GLuint clear;
-	GLint drawOffset, drawPitch;
+	struct radeon_renderbuffer *rrb;
 };

 struct radeon_state {
 	struct radeon_colorbuffer_state color;
 	struct radeon_scissor_state scissor;
+	struct radeon_renderbuffer *depth_buffer;
 };

 /**
@@ -185,6 +187,8 @@ struct radeon_context {
 	/* Configuration cache
 	 */
 	driOptionCache optionCache;
+
+	struct radeon_bufmgr *bufmgr;
 };

 #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
index 0c1a195..486ce8e 100644
--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "swrast/swrast.h"
 #include "r300_context.h"
 #include "radeon_ioctl.h"
+#include "radeon_buffer.h"
 #include "r300_ioctl.h"
 #include "r300_state.h"
 #include "radeon_reg.h"
@@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
 	assert(dPriv->driContextPriv->driverPrivate);

 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-
+
 	if (RADEON_DEBUG & DEBUG_IOCTL) {
 		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
 			(void *)radeon->glCtx);
@@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
 	GLint ret;
 	GLboolean missed_target;
 	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
+	GLframebuffer *fb = dPriv->driverPrivate;
+	struct radeon_renderbuffer *rrb;

 	assert(dPriv);
 	assert(dPriv->driContextPriv);
@@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)

 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;

+	rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+
 	if (RADEON_DEBUG & DEBUG_IOCTL) {
 		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
 			radeon->sarea->pfCurrentPage);
@@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
 	radeon->swap_count++;
 	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);

-        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
+        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
                              radeon->sarea->pfCurrentPage);

-	if (radeon->sarea->pfCurrentPage == 1) {
-		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
-		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
-	} else {
-		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
-		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
-	}
-
-	if (IS_R300_CLASS(radeon->radeonScreen)) {
-		r300ContextPtr r300 = (r300ContextPtr)radeon;
-		R300_STATECHANGE(r300, cb);
-		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
-						r300->radeon.radeonScreen->fbLocation;
-		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
-
-		if (r300->radeon.radeonScreen->cpp == 4)
-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
-		else
-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
-
-		if (r300->radeon.sarea->tiling_enabled)
-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
-	}
+	radeon->state.color.rrb = rrb;
 }

 void radeonWaitForIdleLocked(radeonContextPtr radeon)
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
index d54a821..3529555 100644
--- a/src/mesa/drivers/dri/r300/radeon_lock.c
+++ b/src/mesa/drivers/dri/r300/radeon_lock.c
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_state.h"
 #include "r300_context.h"
 #include "r300_state.h"
+#include "r300_mem.h"

 #include "framebuffer.h"

@@ -59,6 +60,8 @@ int prevLockLine = 0;
 void radeonUpdatePageFlipping(radeonContextPtr rmesa)
 {
 	int use_back;
+	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+	GLframebuffer *fb = drawable->driverPrivate;

 	rmesa->doPageFlip = rmesa->sarea->pfState;
 	if (rmesa->glCtx->WinSysDrawBuffer) {
@@ -72,16 +75,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa)
 	     BUFFER_BACK_LEFT) : 1;
 	use_back ^= (rmesa->sarea->pfCurrentPage == 1);

-	if (use_back) {
-		rmesa->state.color.drawOffset =
-		    rmesa->radeonScreen->backOffset;
-		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
-	} else {
-		rmesa->state.color.drawOffset =
-		    rmesa->radeonScreen->frontOffset;
-		rmesa->state.color.drawPitch =
-		    rmesa->radeonScreen->frontPitch;
-	}
+	if (use_back)
+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+	else
+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+
+	rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
 }

 /* Update the hardware state.  This is called if another context has
@@ -125,12 +124,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
 	}

 	if (sarea->ctx_owner != rmesa->dri.hwContext) {
-		int i;
-
 		sarea->ctx_owner = rmesa->dri.hwContext;
-		for (i = 0; i < r300->nr_heaps; i++) {
-			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
-		}
+		radeonBufmgrContendedLockTake(r300->radeon.bufmgr);
 	}

 	rmesa->lost_context = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
index f1bc56e..7ea0842 100644
--- a/src/mesa/drivers/dri/r300/radeon_span.c
+++ b/src/mesa/drivers/dri/r300/radeon_span.c
@@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_ioctl.h"
 #include "radeon_span.h"

-#include "drirenderbuffer.h"
+#include "radeon_buffer.h"

 #define DBG 0

@@ -58,21 +58,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * information.
  */
 #define LOCAL_VARS						\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
    const GLuint bottom = dPriv->h - 1;				\
-   GLubyte *buf = (GLubyte *) drb->flippedData			\
-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
+   GLubyte *buf = (GLubyte *) rrb->bo->virtual			\
+      + (dPriv->y * rrb->pitch + dPriv->x) * rrb->cpp;	\
    GLuint p;							\
    (void) p;

 #define LOCAL_DEPTH_VARS				\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
+   struct radeon_renderbuffer *rrb = (void *) rb;	\
+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
    const GLuint bottom = dPriv->h - 1;			\
    GLuint xo = dPriv->x;				\
    GLuint yo = dPriv->y;				\
-   GLubyte *buf = (GLubyte *) drb->Base.Data;
+   GLubyte *buf = (GLubyte *) rrb->base.Data;

 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS

@@ -93,7 +93,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 #define TAG(x)    radeon##x##_RGB565
 #define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 2)
 #include "spantmp2.h"

 /* 32 bit, ARGB8888 color spanline and pixel functions
@@ -103,7 +103,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 #define TAG(x)    radeon##x##_ARGB8888
 #define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#define GET_PTR(X,Y) (buf + ((Y) * rrb->pitch + (X)) * 4)
 #include "spantmp2.h"

 /* ================================================================
@@ -120,10 +120,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * too...
  */

-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+static GLuint radeon_mba_z32(const struct radeon_renderbuffer * rrb,
+			     GLint x, GLint y)
 {
-	GLuint pitch = drb->pitch;
-	if (drb->depthHasSurface) {
+	GLuint pitch = rrb->pitch;
+	if (rrb->depthHasSurface) {
 		return 4 * (x + y * pitch);
 	} else {
 		GLuint ba, address = 0;	/* a[0..1] = 0           */
@@ -148,10 +149,10 @@ static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
 }

 static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+radeon_mba_z16(const struct radeon_renderbuffer *rrb, GLint x, GLint y)
 {
-	GLuint pitch = drb->pitch;
-	if (drb->depthHasSurface) {
+	GLuint pitch = rrb->pitch;
+	if (rrb->depthHasSurface) {
 		return 2 * (x + y * pitch);
 	} else {
 		GLuint ba, address = 0;	/* a[0]    = 0           */
@@ -173,10 +174,10 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
 /* 16-bit depth buffer functions
  */
 #define WRITE_DEPTH( _x, _y, d )					\
-   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+   *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo )) = d;

 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+   d = *(GLushort *)(buf + radeon_mba_z16( rrb, _x + xo, _y + yo ));

 #define TAG(x) radeon##x##_z16
 #include "depthtmp.h"
@@ -189,7 +190,7 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
 #ifdef COMPILE_R300
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
    GLuint tmp = *(GLuint *)(buf + offset);				\
    tmp &= 0x000000ff;							\
    tmp |= ((d << 8) & 0xffffff00);					\
@@ -198,7 +199,7 @@ do {									\
 #else
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
    GLuint tmp = *(GLuint *)(buf + offset);				\
    tmp &= 0xff000000;							\
    tmp |= ((d) & 0x00ffffff);						\
@@ -209,12 +210,12 @@ do {									\
 #ifdef COMPILE_R300
 #define READ_DEPTH( d, _x, _y )						\
   do { \
-    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
+    d = (*(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo,		\
 					 _y + yo )) & 0xffffff00) >> 8; \
   }while(0)
 #else
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
+   d = *(GLuint *)(buf + radeon_mba_z32( rrb, _x + xo,			\
 					 _y + yo )) & 0x00ffffff;
 #endif

@@ -230,7 +231,7 @@ do {									\
 #ifdef COMPILE_R300
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
    GLuint tmp = *(GLuint *)(buf + offset);				\
    tmp &= 0xffffff00;							\
    tmp |= (d) & 0xff;							\
@@ -239,7 +240,7 @@ do {									\
 #else
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
    GLuint tmp = *(GLuint *)(buf + offset);				\
    tmp &= 0x00ffffff;							\
    tmp |= (((d) & 0xff) << 24);						\
@@ -250,14 +251,14 @@ do {									\
 #ifdef COMPILE_R300
 #define READ_STENCIL( d, _x, _y )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
    GLuint tmp = *(GLuint *)(buf + offset);				\
    d = tmp & 0x000000ff;						\
 } while (0)
 #else
 #define READ_STENCIL( d, _x, _y )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+   GLuint offset = radeon_mba_z32( rrb, _x + xo, _y + yo );		\
    GLuint tmp = *(GLuint *)(buf + offset);				\
    d = (tmp & 0xff000000) >> 24;					\
 } while (0)
@@ -300,10 +301,10 @@ static void radeonSpanRenderStart(GLcontext * ctx)
 	 */
 	{
 		int p;
-		driRenderbuffer *drb =
-			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+		struct radeon_renderbuffer *rrb =
+			(void *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
 		volatile int *buf =
-			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+			(volatile int *)(rmesa->dri.screen->pFB + rrb->bo->offset);
 		p = *buf;
 	}
 }
@@ -326,20 +327,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
 /**
  * Plug in the Get/Put routines for the given driRenderbuffer.
  */
-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
 {
-	if (drb->Base.InternalFormat == GL_RGBA) {
-		if (vis->redBits == 5 && vis->greenBits == 6
-		    && vis->blueBits == 5) {
-			radeonInitPointers_RGB565(&drb->Base);
-		} else {
-			radeonInitPointers_ARGB8888(&drb->Base);
-		}
-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
-		radeonInitDepthPointers_z16(&drb->Base);
-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
-		radeonInitDepthPointers_z24_s8(&drb->Base);
-	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-		radeonInitStencilPointers_z24_s8(&drb->Base);
-	}
+    if (rrb->base.InternalFormat == GL_RGB5) {
+	radeonInitPointers_RGB565(&rrb->base);
+    } else if (rrb->base.InternalFormat == GL_RGBA8) {
+	radeonInitPointers_ARGB8888(&rrb->base);
+    } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
+	radeonInitDepthPointers_z16(&rrb->base);
+    } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
+	radeonInitDepthPointers_z24_s8(&rrb->base);
+    } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+	radeonInitStencilPointers_z24_s8(&rrb->base);
+    }
 }
diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
index d81318c..a7720da 100644
--- a/src/mesa/drivers/dri/r300/radeon_state.c
+++ b/src/mesa/drivers/dri/r300/radeon_state.c
@@ -222,14 +222,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
 void radeonInitState(radeonContextPtr radeon)
 {
 	radeon->Fallback = 0;
-
-	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
-		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
-		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
-	} else {
-		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
-		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
-	}
 }


diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h
new file mode 100644
index 0000000..730c40b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software")
+ * to deal in the software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * them Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Adam Jackson <ajax@redhat.com>
+ */
+
+#ifndef RADEON_BUFFER_H
+#define RADEON_BUFFER_H
+
+#include "dri_bufmgr.h"
+
+struct radeon_renderbuffer
+{
+    struct gl_renderbuffer base;
+    dri_bo *bo;
+    unsigned int cpp;
+    /* unsigned int offset; */
+    unsigned int pitch;
+    unsigned int height;
+
+    /* boo Xorg 6.8.2 compat */
+    int depthHasSurface;
+
+    __DRIdrawablePrivate *dPriv;
+};
+
+struct radeon_bufmgr {
+	dri_bufmgr base;
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 84b5c46..10a49d2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_chipset.h"
 #include "radeon_macros.h"
 #include "radeon_screen.h"
+#include "radeon_buffer.h"
 #if !RADEON_COMMON
 #include "radeon_context.h"
 #include "radeon_span.h"
@@ -69,6 +70,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 #include "GL/internal/dri_interface.h"

+#include <errno.h>
+#include <sys/ioctl.h>
+
 /* Radeon configuration
  */
 #include "xmlpool.h"
@@ -350,6 +354,79 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
 };
 #endif

+
+static void
+radeon_gem_update_handle(radeonScreenPtr screen, __DRIscreenPrivate *sPriv,
+			 struct radeon_gem_object *gem_obj)
+{
+     struct drm_gem_close close_args;
+     struct drm_gem_open args;
+     struct drm_radeon_gem_mmap mmap_args;
+     struct drm_radeon_gem_pin pin_args;
+     int ret;
+
+     if (gem_obj->gem_handle) {
+	     close_args.handle = gem_obj->gem_handle;
+
+	     ioctl(sPriv->fd, DRM_IOCTL_GEM_CLOSE, &close_args);
+	     gem_obj->gem_handle = 0;
+     }
+
+     /* do open */
+     args.name = gem_obj->gem_name;
+     ret = ioctl(sPriv->fd, DRM_IOCTL_GEM_OPEN, &args);
+     if (ret)
+	     return;
+
+     gem_obj->gem_handle = args.handle;
+     gem_obj->size = args.size;
+
+     mmap_args.handle = gem_obj->gem_handle;
+     mmap_args.size = gem_obj->size;
+     mmap_args.offset = 0;
+
+     ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_MMAP, &mmap_args,
+			       sizeof(mmap_args));
+
+     if (ret)
+	     return;
+
+     gem_obj->map = (void *)(unsigned long)(mmap_args.addr_ptr);
+
+     pin_args.handle = gem_obj->gem_handle;
+     pin_args.alignment = 0;
+
+     ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GEM_PIN, &pin_args,
+			       sizeof(pin_args));
+
+     if (ret)
+	     return;
+
+     gem_obj->offset = pin_args.offset;
+
+     fprintf(stderr,"handle %d, size %llx, ptr %p, offset %llx\n", gem_obj->gem_handle,
+	     gem_obj->size, gem_obj->map, gem_obj->offset);
+}
+
+static int
+radeon_init_mm_buffers(radeonScreenPtr screen, __DRIscreenPrivate *sPriv,
+		       RADEONDRIPtr dri_priv)
+{
+	/* STOP GAP HERE */
+
+	screen->front.gem_name = dri_priv->frontOffset;
+	radeon_gem_update_handle(screen, sPriv, &screen->front);
+	screen->back.gem_name = dri_priv->backOffset;
+	radeon_gem_update_handle(screen, sPriv, &screen->back);
+	screen->depth.gem_name = dri_priv->depthOffset;
+	radeon_gem_update_handle(screen, sPriv, &screen->depth);
+
+	screen->vram_texture.gem_name = dri_priv->textureOffset;
+	radeon_gem_update_handle(screen, sPriv, &screen->vram_texture);
+	screen->vram_texture.gem_name = dri_priv->gartTexHandle;
+	radeon_gem_update_handle(screen, sPriv, &screen->gart_texture);
+}
+
 /* Create the device specific screen private data struct.
  */
 static radeonScreenPtr
@@ -389,6 +466,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
    screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
    {
       int ret;
+
+#ifdef RADEON_PARAM_KERNEL_MM
+     ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
+                            &screen->kernel_mm);
+
+      if (ret && ret != -EINVAL) {
+         FREE( screen );
+         fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
+         return NULL;
+      }
+
+      if (ret == -EINVAL)
+          screen->kernel_mm = 0;
+#endif
+
       ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
 			    &screen->gart_buffer_offset);

@@ -422,32 +514,34 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
       screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
    }

-   screen->mmio.handle = dri_priv->registerHandle;
-   screen->mmio.size   = dri_priv->registerSize;
-   if ( drmMap( sPriv->fd,
-		screen->mmio.handle,
-		screen->mmio.size,
-		&screen->mmio.map ) ) {
-      FREE( screen );
-      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-      return NULL;
-   }
+   if (!screen->kernel_mm) {
+      screen->mmio.handle = dri_priv->registerHandle;
+      screen->mmio.size   = dri_priv->registerSize;
+      if ( drmMap( sPriv->fd,
+		   screen->mmio.handle,
+		   screen->mmio.size,
+		   &screen->mmio.map ) ) {
+	 FREE( screen );
+	 __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+	 return NULL;
+      }

-   RADEONMMIO = screen->mmio.map;
+      RADEONMMIO = screen->mmio.map;

-   screen->status.handle = dri_priv->statusHandle;
-   screen->status.size   = dri_priv->statusSize;
-   if ( drmMap( sPriv->fd,
-		screen->status.handle,
-		screen->status.size,
-		&screen->status.map ) ) {
-      drmUnmap( screen->mmio.map, screen->mmio.size );
-      FREE( screen );
-      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-      return NULL;
+      screen->status.handle = dri_priv->statusHandle;
+      screen->status.size   = dri_priv->statusSize;
+      if ( drmMap( sPriv->fd,
+		   screen->status.handle,
+		   screen->status.size,
+		   &screen->status.map ) ) {
+	 drmUnmap( screen->mmio.map, screen->mmio.size );
+	 FREE( screen );
+	 __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+	 return NULL;
+      }
+      screen->scratch = (__volatile__ u_int32_t *)
+	 ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
    }
-   screen->scratch = (__volatile__ u_int32_t *)
-      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);

    screen->buffers = drmMapBufs( sPriv->fd );
    if ( !screen->buffers ) {
@@ -458,22 +552,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
       return NULL;
    }

-   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-      screen->gartTextures.handle = dri_priv->gartTexHandle;
-      screen->gartTextures.size   = dri_priv->gartTexMapSize;
-      if ( drmMap( sPriv->fd,
-		   screen->gartTextures.handle,
-		   screen->gartTextures.size,
-		   (drmAddressPtr)&screen->gartTextures.map ) ) {
-	 drmUnmapBufs( screen->buffers );
-	 drmUnmap( screen->status.map, screen->status.size );
-	 drmUnmap( screen->mmio.map, screen->mmio.size );
-	 FREE( screen );
-	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
-	 return NULL;
+   if (!screen->kernel_mm) {
+      if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+	 screen->gartTextures.handle = dri_priv->gartTexHandle;
+	 screen->gartTextures.size   = dri_priv->gartTexMapSize;
+	 if ( drmMap( sPriv->fd,
+		      screen->gartTextures.handle,
+		      screen->gartTextures.size,
+		      (drmAddressPtr)&screen->gartTextures.map ) ) {
+	    drmUnmapBufs( screen->buffers );
+	    drmUnmap( screen->status.map, screen->status.size );
+	    drmUnmap( screen->mmio.map, screen->mmio.size );
+	    FREE( screen );
+	    __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+	    return NULL;
+	 }
+
+	 screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
       }
-
-      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
    }

    screen->chip_flags = 0;
@@ -840,7 +936,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
                          &temp);
    if (ret) {
-       if (screen->chip_family < CHIP_FAMILY_RS690)
+       if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
        else {
            FREE( screen );
@@ -881,55 +977,58 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
        }
    }

-   if ( sPriv->drm_version.minor >= 10 ) {
-      drm_radeon_setparam_t sp;
+   if (!screen->kernel_mm) {
+      if ( sPriv->drm_version.minor >= 10 ) {
+	 drm_radeon_setparam_t sp;

-      sp.param = RADEON_SETPARAM_FB_LOCATION;
-      sp.value = screen->fbLocation;
+	 sp.param = RADEON_SETPARAM_FB_LOCATION;
+	 sp.value = screen->fbLocation;

-      drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
-		       &sp, sizeof( sp ) );
-   }
-
-   screen->frontOffset	= dri_priv->frontOffset;
-   screen->frontPitch	= dri_priv->frontPitch;
-   screen->backOffset	= dri_priv->backOffset;
-   screen->backPitch	= dri_priv->backPitch;
-   screen->depthOffset	= dri_priv->depthOffset;
-   screen->depthPitch	= dri_priv->depthPitch;
-
-   /* Check if ddx has set up a surface reg to cover depth buffer */
-   screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
-      /* these chips don't use tiled z without hyperz. So always pretend
-         we have set up a surface which will cause linear reads/writes */
-      ((screen->chip_family & RADEON_CLASS_R100) &&
-      !(screen->chip_flags & RADEON_CHIPSET_TCL));
-
-   if ( dri_priv->textureSize == 0 ) {
-      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
-      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
-      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
-	 dri_priv->log2GARTTexGran;
-   } else {
-      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
-				               + screen->fbLocation;
-      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
-      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
-	 dri_priv->log2TexGran;
-   }
+	 drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
+			  &sp, sizeof( sp ) );
+      }

-   if ( !screen->gartTextures.map || dri_priv->textureSize == 0
-	|| getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
-      screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
-      screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
-      screen->texSize[RADEON_GART_TEX_HEAP] = 0;
-      screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
+      screen->frontOffset	= dri_priv->frontOffset;
+      screen->frontPitch	= dri_priv->frontPitch;
+      screen->backOffset	= dri_priv->backOffset;
+      screen->backPitch	= dri_priv->backPitch;
+      screen->depthOffset	= dri_priv->depthOffset;
+      screen->depthPitch	= dri_priv->depthPitch;
+
+      /* Check if ddx has set up a surface reg to cover depth buffer */
+      screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
+	 /* these chips don't use tiled z without hyperz. So always pretend
+	    we have set up a surface which will cause linear reads/writes */
+	 ((screen->chip_family & RADEON_CLASS_R100) &&
+	  !(screen->chip_flags & RADEON_CHIPSET_TCL));
+
+      if ( dri_priv->textureSize == 0 ) {
+	 screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
+	 screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
+	 screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
+	    dri_priv->log2GARTTexGran;
+      } else {
+	 screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
+	    + screen->fbLocation;
+	 screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
+	 screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
+	    dri_priv->log2TexGran;
+      }
+
+      if ( !screen->gartTextures.map || dri_priv->textureSize == 0
+	   || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
+	 screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
+	 screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
+	 screen->texSize[RADEON_GART_TEX_HEAP] = 0;
+	 screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
+      } else {
+	 screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
+	 screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
+	 screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
+	 screen->logTexGranularity[RADEON_GART_TEX_HEAP] = dri_priv->log2GARTTexGran;
+      }
    } else {
-      screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
-      screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
-      screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
-      screen->logTexGranularity[RADEON_GART_TEX_HEAP] =
-	 dri_priv->log2GARTTexGran;
+      radeon_init_mm_buffers(screen, sPriv, dri_priv);
    }

    i = 0;
@@ -975,12 +1074,14 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
    if (!screen)
       return;

-   if ( screen->gartTextures.map ) {
-      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
-   }
    drmUnmapBufs( screen->buffers );
-   drmUnmap( screen->status.map, screen->status.size );
-   drmUnmap( screen->mmio.map, screen->mmio.size );
+   if (!screen->kernel_mm) {
+      if ( screen->gartTextures.map ) {
+	 drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+      }
+      drmUnmap( screen->status.map, screen->status.size );
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+   }

    /* free all option information */
    driDestroyOptionInfo (&screen->optionCache);
@@ -1004,6 +1105,158 @@ radeonInitDriver( __DRIscreenPrivate *sPriv )
    return GL_TRUE;
 }

+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+static GLboolean
+radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+			    GLenum intFormat, GLuint w, GLuint h)
+{
+    rb->Width = w;
+    rb->Height = h;
+    rb->_ActualFormat = intFormat;
+
+    return GL_TRUE;
+}
+
+
+static struct radeon_renderbuffer *
+radeon_create_renderbuffer(GLenum format)
+{
+    struct radeon_renderbuffer *ret;
+
+    ret = CALLOC_STRUCT(radeon_renderbuffer);
+    if (!ret)
+	return NULL;
+
+    _mesa_init_renderbuffer(&ret->base, 0);
+
+    /* XXX format junk */
+    switch (format) {
+	case GL_RGB5:
+	    ret->base._ActualFormat = GL_RGB5;
+	    ret->base._BaseFormat = GL_RGBA;
+	    ret->base.RedBits = 5;
+	    ret->base.GreenBits = 6;
+	    ret->base.BlueBits = 5;
+	    ret->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_RGBA8:
+	    ret->base._ActualFormat = GL_RGBA8;
+	    ret->base._BaseFormat = GL_RGBA;
+	    ret->base.RedBits = 8;
+	    ret->base.GreenBits = 8;
+	    ret->base.BlueBits = 8;
+	    ret->base.AlphaBits = 8;
+	    ret->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_STENCIL_INDEX8_EXT:
+	    ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
+	    ret->base._BaseFormat = GL_STENCIL_INDEX;
+	    ret->base.StencilBits = 8;
+	    ret->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_DEPTH_COMPONENT16:
+	    ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
+	    ret->base.DepthBits = 16;
+	    ret->base.DataType = GL_UNSIGNED_SHORT;
+	    break;
+	case GL_DEPTH_COMPONENT24:
+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
+	    ret->base.DepthBits = 24;
+	    ret->base.DataType = GL_UNSIGNED_INT;
+	    break;
+	case GL_DEPTH24_STENCIL8_EXT:
+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+	    ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+	    ret->base.DepthBits = 24;
+	    ret->base.StencilBits = 8;
+	    ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+	    break;
+	default:
+	    /* whoops */
+	    break;
+    }
+
+    ret->base.InternalFormat = format;
+
+    ret->base.AllocStorage = radeon_alloc_window_storage;
+
+    radeonSetSpanFunctions(ret);
+
+    return ret;
+}
+
+/**
+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
+ *
+ * \todo This function (and its interface) will need to be updated to support
+ * pbuffers.
+ */
+static GLboolean
+radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+                    __DRIdrawablePrivate *driDrawPriv,
+                    const __GLcontextModes *mesaVis,
+                    GLboolean isPixmap )
+{
+   radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
+
+    const GLboolean swDepth = GL_FALSE;
+    const GLboolean swAlpha = GL_FALSE;
+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+	mesaVis->depthBits != 24;
+    GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
+    GLenum depthFormat = GL_NONE;
+    struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+    if (mesaVis->depthBits == 16)
+	depthFormat = GL_DEPTH_COMPONENT16;
+    else if (mesaVis->depthBits == 24)
+	depthFormat = GL_DEPTH_COMPONENT24;
+
+    /* front color renderbuffer */
+    {
+	struct radeon_renderbuffer *front =
+	    radeon_create_renderbuffer(rgbFormat);
+	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
+    }
+
+    /* back color renderbuffer */
+    if (mesaVis->doubleBufferMode) {
+	struct radeon_renderbuffer *back =
+	    radeon_create_renderbuffer(GL_RGBA);
+	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
+    }
+
+    /* depth renderbuffer */
+    if (depthFormat != GL_NONE) {
+	struct radeon_renderbuffer *depth =
+	    radeon_create_renderbuffer(depthFormat);
+	_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
+	depth->depthHasSurface = screen->depthHasSurface;
+    }
+
+    /* stencil renderbuffer */
+    if (mesaVis->stencilBits > 0 && !swStencil) {
+	struct radeon_renderbuffer *stencil =
+	    radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT);
+	_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
+	stencil->depthHasSurface = screen->depthHasSurface;
+    }
+
+    _mesa_add_soft_renderbuffers(fb,
+	    GL_FALSE, /* color */
+	    swDepth,
+	    swStencil,
+	    swAccum,
+	    swAlpha,
+	    GL_FALSE /* aux */);
+    driDrawPriv->driverPrivate = (void *) fb;
+
+    return (driDrawPriv->driverPrivate != NULL);
+}
+#else

 /**
  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
@@ -1105,6 +1358,11 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
 }


+
+
+#endif
+
+
 static void
 radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
 {
@@ -1199,11 +1457,11 @@ radeonInitScreen(__DRIscreenPrivate *psp)
    if (!radeonInitDriver(psp))
        return NULL;

+   /* for now fill in all modes */
    return radeonFillInModes( psp,
 			     dri_priv->bpp,
 			     (dri_priv->bpp == 16) ? 16 : 24,
-			     (dri_priv->bpp == 16) ? 0  : 8,
-			     (dri_priv->backOffset != dri_priv->depthOffset) );
+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
 }


diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
index ab859d5..82eb7d8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -55,6 +55,14 @@ typedef struct {
    drmAddress map;			/* Mapping of the DRM region */
 } radeonRegionRec, *radeonRegionPtr;

+struct radeon_gem_object {
+   uint32_t gem_name;
+   uint32_t gem_handle;
+   uint64_t size;
+   void *map;
+   uint64_t offset;
+};
+
 typedef struct {
    int chip_family;
    int chip_flags;
@@ -107,6 +115,13 @@ typedef struct {
    const __DRIextension *extensions[8];

    int num_gb_pipes;
+
+   int kernel_mm;
+   struct radeon_gem_object front;
+   struct radeon_gem_object back;
+   struct radeon_gem_object depth;
+   struct radeon_gem_object vram_texture;
+   struct radeon_gem_object gart_texture;
 } radeonScreenRec, *radeonScreenPtr;

 #define IS_R100_CLASS(screen) \
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
index 9abe086..1650a9b 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.h
+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
@@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 #include "drirenderbuffer.h"

+#include "radeon_buffer.h"
+
 extern void radeonInitSpanFuncs(GLcontext * ctx);
-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);

+#if COMPILE_R300
+extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+#else
+extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
+#endif
 #endif