diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index c1d223c..4424896 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -41,6 +41,7 @@ DRIVER_SOURCES = \ r300_fragprog.c \ r300_shader.c \ r300_emit.c \ + r300_swtcl.c \ $(EGL_SOURCES) C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index d13649d..9eca41f 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -133,13 +133,15 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat int i; int dwords = (*state->check) (r300, state); - fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords, + fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); - if (RADEON_DEBUG & DEBUG_VERBOSE) - for (i = 0; i < dwords; i++) - fprintf(stderr, " %s[%d]: %08X\n", + if (RADEON_DEBUG & DEBUG_VERBOSE) { + for (i = 0; i < dwords; i++) { + fprintf(stderr, " %s[%d]: %08x\n", state->name, i, state->cmd[i]); + } + } } /** @@ -148,28 +150,14 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat * The caller must have ensured that there is enough space in the command * buffer. */ -static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) +static inline void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) { struct r300_state_atom *atom; uint32_t *dest; + int dwords; dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - foreach(atom, &r300->hw.atomlist) { - if ((atom->dirty || r300->hw.all_dirty) == dirty) { - int dwords = (*atom->check) (r300, atom); - - if (dwords) - r300PrintStateAtom(r300, atom); - else - fprintf(stderr, - " skip state %s\n", - atom->name); - } - } - } - /* Emit WAIT */ *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); dest++; @@ -193,13 +181,20 @@ static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) foreach(atom, &r300->hw.atomlist) { if ((atom->dirty || r300->hw.all_dirty) == dirty) { - int dwords = (*atom->check) (r300, atom); - + dwords = (*atom->check) (r300, atom); if (dwords) { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + r300PrintStateAtom(r300, atom); + } memcpy(dest, atom->cmd, dwords * 4); dest += dwords; r300->cmdbuf.count_used += dwords; atom->dirty = GL_FALSE; + } else { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + fprintf(stderr, " skip state %s\n", + atom->name); + } } } } @@ -245,22 +240,28 @@ void r300EmitState(r300ContextPtr r300) r300->hw.all_dirty = GL_FALSE; } -#define CHECK( NM, COUNT ) \ -static int check_##NM( r300ContextPtr r300, \ - struct r300_state_atom* atom ) \ -{ \ - (void) atom; (void) r300; \ - return (COUNT); \ -} - #define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) -CHECK(always, atom->cmd_size) - CHECK(variable, packet0_count(atom->cmd) ? (1 + packet0_count(atom->cmd)) : 0) - CHECK(vpu, vpu_count(atom->cmd) ? (1 + vpu_count(atom->cmd) * 4) : 0) -#undef packet0_count -#undef vpu_count +static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) +{ + return atom->cmd_size; +} + +static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = packet0_count(atom->cmd); + return cnt ? cnt + 1 : 0; +} + +static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = vpu_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ do { \ r300->hw.ATOM.cmd_size = (SZ); \ @@ -318,10 +319,14 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.unk21DC.cmd[0] = cmdpacket0(0x21DC, 1); ALLOC_STATE(unk221C, always, 2, 0); r300->hw.unk221C.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_221C, 1); - ALLOC_STATE(unk2220, always, 5, 0); - r300->hw.unk2220.cmd[0] = cmdpacket0(0x2220, 4); - ALLOC_STATE(unk2288, always, 2, 0); - r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1); + ALLOC_STATE(vap_clip, always, 5, 0); + r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_CLIP_X_0, 4); + + if (has_tcl) { + ALLOC_STATE(unk2288, always, 2, 0); + r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1); + } + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); r300->hw.vof.cmd[R300_VOF_CMD_0] = cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2); diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index bfb2eda..acb6e38 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -52,7 +52,7 @@ extern void r300DestroyCmdBuf(r300ContextPtr r300); * * \param dwords The number of dwords we need to be free on the command buffer */ -static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300, +static inline void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller) { assert(dwords < r300->cmdbuf.size); @@ -68,7 +68,7 @@ static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300, * causes state reemission after a flush. This is necessary to ensure * correct hardware state after an unlock. */ -static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, +static inline uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, int dwords, const char *caller) { uint32_t *ptr; @@ -80,7 +80,7 @@ static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, return ptr; } -static __inline__ uint32_t *r300AllocCmdBuf(r300ContextPtr r300, +static inline uint32_t *r300AllocCmdBuf(r300ContextPtr r300, int dwords, const char *caller) { uint32_t *ptr; diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 9ea14ab..14e0f05 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_swtcl.h" #ifdef USER_BUFFERS #include "r300_mem.h" @@ -317,15 +318,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + if (screen->chip_flags & RADEON_CHIPSET_TCL) { + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ @@ -363,6 +366,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, radeonInitSpanFuncs(ctx); r300InitCmdBuf(r300); r300InitState(r300); + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + r300InitSwtcl(ctx); TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 4149867..be69097 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -49,8 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define USER_BUFFERS -//#define OPTIMIZE_ELTS - struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; @@ -80,7 +78,7 @@ typedef struct r300_context *r300ContextPtr; /** * This function takes a float and packs it into a uint32_t */ -static __inline__ uint32_t r300PackFloat32(float fl) +static inline uint32_t r300PackFloat32(float fl) { union { float fl; @@ -97,7 +95,7 @@ static __inline__ uint32_t r300PackFloat32(float fl) * But it works for most things. I'll fix it later if someone * else with a better clue doesn't */ -static __inline__ uint32_t r300PackFloat24(float f) +static inline uint32_t r300PackFloat24(float f) { float mantissa; int exponent; @@ -149,7 +147,6 @@ struct r300_dma_region { int aos_offset; /* address in GART memory */ int aos_stride; /* distance between elements, in dwords */ int aos_size; /* number of components (1-4) */ - int aos_reg; /* VAP register assignment */ }; struct r300_dma { @@ -455,7 +452,7 @@ struct r300_hw_state { struct r300_state_atom vic; /* vap input control (2180) */ struct r300_state_atom unk21DC; /* (21DC) */ struct r300_state_atom unk221C; /* (221C) */ - struct r300_state_atom unk2220; /* (2220) */ + struct r300_state_atom vap_clip; struct r300_state_atom unk2288; /* (2288) */ struct r300_state_atom pvs; /* pvs_cntl (22D0) */ struct r300_state_atom gb_enable; /* (4008) */ @@ -571,38 +568,21 @@ struct r300_vertex_shader_fragment { union { GLuint d[VSF_MAX_FRAGMENT_LENGTH]; float f[VSF_MAX_FRAGMENT_LENGTH]; - VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4]; + GLuint i[VSF_MAX_FRAGMENT_LENGTH]; } body; }; -#define VSF_DEST_PROGRAM 0x0 -#define VSF_DEST_MATRIX0 0x200 -#define VSF_DEST_MATRIX1 0x204 -#define VSF_DEST_MATRIX2 0x208 -#define VSF_DEST_VECTOR0 0x20c -#define VSF_DEST_VECTOR1 0x20d -#define VSF_DEST_UNKNOWN1 0x400 -#define VSF_DEST_UNKNOWN2 0x406 - struct r300_vertex_shader_state { struct r300_vertex_shader_fragment program; - - struct r300_vertex_shader_fragment unknown1; - struct r300_vertex_shader_fragment unknown2; - - int program_start; - int unknown_ptr1; /* pointer within program space */ - int program_end; - - int param_offset; - int param_count; - - int unknown_ptr2; /* pointer within program space */ - int unknown_ptr3; /* pointer within program space */ }; extern int hw_tcl_on; +#define COLOR_IS_RGBA +#define TAG(x) r300##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + //#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) #define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) @@ -783,37 +763,10 @@ struct r300_fragment_program { #define R300_MAX_AOS_ARRAYS 16 -#define AOS_FORMAT_USHORT 0 -#define AOS_FORMAT_FLOAT 1 -#define AOS_FORMAT_UBYTE 2 -#define AOS_FORMAT_FLOAT_COLOR 3 - #define REG_COORDS 0 #define REG_COLOR0 1 #define REG_TEX0 2 -struct dt { - GLint size; - GLenum type; - GLsizei stride; - void *data; -}; - -struct radeon_vertex_buffer { - int Count; - void *Elts; - int elt_size; - int elt_min, elt_max; /* debug */ - - struct dt AttribPtr[VERT_ATTRIB_MAX]; - - const struct _mesa_prim *Primitive; - GLuint PrimitiveCount; - GLint LockFirst; - GLsizei LockCount; - int lock_uptodate; -}; - struct r300_state { struct r300_depthbuffer_state depth; struct r300_texture_state texture; @@ -822,18 +775,14 @@ struct r300_state { struct r300_pfs_compile_state pfs_compile; struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; int aos_count; - struct radeon_vertex_buffer VB; GLuint *Elts; struct r300_dma_region elt_dma; - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + struct r300_dma_region swtcl_dma; + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. They are the same as tnl->render_inputs for fixed pipeline */ - struct { - int transform_offset; /* Transform matrix offset, -1 if none */ - } vap_param; /* vertex processor parameter allocation - tells where to write parameters */ - struct r300_stencilbuffer_state stencil; }; @@ -842,6 +791,62 @@ struct r300_state { #define R300_FALLBACK_TCL 1 #define R300_FALLBACK_RAST 2 +/* r300_swtcl.c + */ +struct r300_swtcl_info { + GLuint RenderIndex; + + /** + * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is + * installed in the Mesa state vector. + */ + GLuint vertex_size; + + /** + * Attributes instructing the Mesa TCL pipeline where / how to put vertex + * data in the hardware buffer. + */ + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + + /** + * Number of elements of \c ::vertex_attrs that are actually used. + */ + GLuint vertex_attr_count; + + /** + * Cached pointer to the buffer where Mesa will store vertex data. + */ + GLubyte *verts; + + /* Fallback rasterization functions + */ + // r200_point_func draw_point; + // r200_line_func draw_line; + // r200_tri_func draw_tri; + + GLuint hw_primitive; + GLenum render_primitive; + GLuint numverts; + + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; + + /** + * Should Mesa project vertex data or will the hardware do it? + */ + GLboolean needproj; + + struct r300_dma_region indexed_verts; +}; + + /** * \brief R300 context structure. */ @@ -880,6 +885,9 @@ struct r300_context { GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; GLboolean disable_lowimpact_fallback; + + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + struct r300_swtcl_info swtcl; }; struct r300_buffer_object { diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 2c26069..424bf44 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -86,16 +86,15 @@ do { \ } while (0) #endif -static void r300EmitVec4(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); if (stride == 4) COPY_DWORDS(out, data, count); @@ -107,16 +106,15 @@ static void r300EmitVec4(GLcontext * ctx, } } -static void r300EmitVec8(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); if (stride == 8) COPY_DWORDS(out, data, count * 2); @@ -129,8 +127,7 @@ static void r300EmitVec8(GLcontext * ctx, } } -static void r300EmitVec12(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; @@ -152,16 +149,15 @@ static void r300EmitVec12(GLcontext * ctx, } } -static void r300EmitVec16(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int stride, int count) { int i; int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); if (stride == 16) COPY_DWORDS(out, data, count * 4); @@ -176,32 +172,22 @@ static void r300EmitVec16(GLcontext * ctx, } } -static void r300EmitVec(GLcontext * ctx, - struct r300_dma_region *rvb, +static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, GLvoid * data, int size, int stride, int count) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d size %d stride %d\n", - __FUNCTION__, count, size, stride); - - /* Gets triggered when playing with future_hw_tcl_on ... */ - //assert(!rvb->buf); - if (stride == 0) { r300AllocDmaRegion(rmesa, rvb, size * 4, 4); count = 1; rvb->aos_offset = GET_START(rvb); rvb->aos_stride = 0; } else { - r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */ + r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); rvb->aos_offset = GET_START(rvb); rvb->aos_stride = size; } - /* Emit the data - */ switch (size) { case 1: r300EmitVec4(ctx, rvb, data, stride, count); @@ -217,95 +203,35 @@ static void r300EmitVec(GLcontext * ctx, break; default: assert(0); - _mesa_exit(-1); - break; - } - -} - -static GLuint t_type(struct dt *dt) -{ - switch (dt->type) { - case GL_UNSIGNED_BYTE: - return AOS_FORMAT_UBYTE; - case GL_SHORT: - return AOS_FORMAT_USHORT; - case GL_FLOAT: - return AOS_FORMAT_FLOAT; - default: - assert(0); - break; - } - - return AOS_FORMAT_FLOAT; -} - -static GLuint t_vir0_size(struct dt *dt) -{ - switch (dt->type) { - case GL_UNSIGNED_BYTE: - return 4; - case GL_SHORT: - return 7; - case GL_FLOAT: - return dt->size - 1; - default: - assert(0); break; } - - return 0; } -static GLuint t_aos_size(struct dt *dt) -{ - switch (dt->type) { - case GL_UNSIGNED_BYTE: - return 1; - case GL_SHORT: - return 2; - case GL_FLOAT: - return dt->size; - default: - assert(0); - break; - } - - return 0; -} - -static GLuint t_vir0(uint32_t * dst, struct dt *dt, int *inputs, - GLint * tab, GLuint nr) +static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr) { GLuint i, dw; + /* type, inputs, stop bit, size */ for (i = 0; i + 1 < nr; i += 2) { - dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) | - (t_type(&dt[tab[i]]) << 14); - dw |= - (t_vir0_size(&dt[tab[i + 1]]) | - (inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]]) - << 14)) << 16; - + dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1); + dw |= (R300_INPUT_ROUTE_FLOAT | (inputs[tab[i + 1]] << 8) | (attribptr[tab[i + 1]]->size - 1)) << 16; if (i + 2 == nr) { - dw |= (1 << (13 + 16)); + dw |= (R300_VAP_INPUT_ROUTE_END << 16); } dst[i >> 1] = dw; } if (nr & 1) { - dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]] - << 8) | - (t_type(&dt[tab[nr - 1]]) << 14); - dw |= 1 << 13; - + dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[nr - 1]] << 8) | (attribptr[tab[nr - 1]]->size - 1); + dw |= R300_VAP_INPUT_ROUTE_END; dst[nr >> 1] = dw; } return (nr + 1) >> 1; } -static GLuint t_swizzle(int swizzle[4]) +static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) { return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | @@ -313,32 +239,32 @@ static GLuint t_swizzle(int swizzle[4]) (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); } -static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr) +GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) { GLuint i; for (i = 0; i + 1 < nr; i += 2) { - dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; - dst[i >> 1] |= - (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) - << 16; + dst[i >> 1] = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; + dst[i >> 1] |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16; } - if (nr & 1) - dst[nr >> 1] = - t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE; + if (nr & 1) { + dst[nr >> 1] = r300VAPInputRoute1Swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE; + } return (nr + 1) >> 1; } -static GLuint t_emit_size(struct dt *dt) +GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) { - return dt->size; + /* No idea what this value means. I have seen other values written to + * this register... */ + return 0x5555; } -static GLuint t_vic(GLcontext * ctx, GLuint InputsRead) +GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint i, vic_1 = 0; if (InputsRead & (1 << VERT_ATTRIB_POS)) @@ -350,177 +276,188 @@ static GLuint t_vic(GLcontext * ctx, GLuint InputsRead) if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) vic_1 |= R300_INPUT_CNTL_COLOR; - r300->state.texture.tc_count = 0; + rmesa->state.texture.tc_count = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { - r300->state.texture.tc_count++; + rmesa->state.texture.tc_count++; vic_1 |= R300_INPUT_CNTL_TC0 << i; } return vic_1; } +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) +{ + GLuint ret = 0; + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL0)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_BFC0) + || OutputsWritten & (1 << VERT_RESULT_BFC1)) + ret |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + +#if 0 + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ; +#endif + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + + return ret; +} + +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) +{ + GLuint i, ret = 0; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { + ret |= (4 << (3 * i)); + } + } + + return ret; +} + /* Emit vertex data to GART memory * Route inputs to the vertex processor * This function should never return R300_FALLBACK_TCL when using software tcl. */ - int r300EmitArrays(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - r300ContextPtr r300 = rmesa; - struct radeon_vertex_buffer *VB = &rmesa->state.VB; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; GLuint nr; - GLuint count = VB->Count; + GLuint count = vb->Count; GLuint i; GLuint InputsRead = 0, OutputsWritten = 0; int *inputs = NULL; int vir_inputs[VERT_ATTRIB_MAX]; GLint tab[VERT_ATTRIB_MAX]; int swizzle[VERT_ATTRIB_MAX][4]; + struct r300_vertex_program *prog = + (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); if (hw_tcl_on) { - struct r300_vertex_program *prog = - (struct r300_vertex_program *) - CURRENT_VERTEX_SHADER(ctx); inputs = prog->inputs; - InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead; - OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + InputsRead = prog->key.InputsRead; + OutputsWritten = prog->key.OutputsWritten; } else { - DECLARE_RENDERINPUTS(inputs_bitset); - inputs = r300->state.sw_tcl_inputs; + inputs = rmesa->state.sw_tcl_inputs; + + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); - RENDERINPUTS_COPY(inputs_bitset, - TNL_CONTEXT(ctx)->render_inputs_bitset); + vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; - assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS)); - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); + //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)); - assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL) - == 0); + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + } - assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0)); - InputsRead |= 1 << VERT_ATTRIB_COLOR0; - OutputsWritten |= 1 << VERT_RESULT_COL0; + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + } - if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) { + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) { InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (RENDERINPUTS_TEST - (inputs_bitset, _TNL_ATTRIB_TEX(i))) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) { InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); } + } - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) - if (InputsRead & (1 << i)) + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { inputs[i] = nr++; - else + } else { inputs[i] = -1; - - if (! - (r300->radeon.radeonScreen-> - chip_flags & RADEON_CHIPSET_TCL)) { - /* Fixed, apply to vir0 only */ - memcpy(vir_inputs, inputs, - VERT_ATTRIB_MAX * sizeof(int)); - inputs = vir_inputs; - - if (InputsRead & VERT_ATTRIB_POS) - inputs[VERT_ATTRIB_POS] = 0; - - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + } } - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, - inputs_bitset); + /* Fixed, apply to vir0 only */ + memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); + inputs = vir_inputs; + if (InputsRead & VERT_ATTRIB_POS) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); } + assert(InputsRead); assert(OutputsWritten); - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) - if (InputsRead & (1 << i)) + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { tab[nr++] = i; + } + } - if (nr > R300_MAX_AOS_ARRAYS) + if (nr > R300_MAX_AOS_ARRAYS) { return R300_FALLBACK_TCL; + } for (i = 0; i < nr; i++) { - int ci; - int comp_size, fix, found = 0; + int ci, fix, found = 0; swizzle[i][0] = SWIZZLE_ZERO; swizzle[i][1] = SWIZZLE_ZERO; swizzle[i][2] = SWIZZLE_ZERO; swizzle[i][3] = SWIZZLE_ONE; - for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { swizzle[i][ci] = ci; - -#if MESA_BIG_ENDIAN -#define SWAP_INT(a, b) do { \ - int __temp; \ - __temp = a;\ - a = b; \ - b = __temp; \ -} while (0) - - if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) { - SWAP_INT(swizzle[i][0], swizzle[i][3]); - SWAP_INT(swizzle[i][1], swizzle[i][2]); } -#endif /* MESA_BIG_ENDIAN */ - if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data, - /*(count-1)*stride */ 4)) { - if (VB->AttribPtr[tab[i]].stride % 4) + if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { + if (vb->AttribPtr[tab[i]]->stride % 4) { return R300_FALLBACK_TCL; - - rmesa->state.aos[i].address = - VB->AttribPtr[tab[i]].data; + } + rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); rmesa->state.aos[i].start = 0; - rmesa->state.aos[i].aos_offset = - r300GartOffsetFromVirtual(rmesa, - VB-> - AttribPtr[tab[i]].data); - rmesa->state.aos[i].aos_stride = - VB->AttribPtr[tab[i]].stride / 4; - - rmesa->state.aos[i].aos_size = - t_emit_size(&VB->AttribPtr[tab[i]]); + rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); + rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; + rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; } else { - /* TODO: r300EmitVec can only handle 4 byte vectors */ - if (VB->AttribPtr[tab[i]].type != GL_FLOAT) - return R300_FALLBACK_TCL; - r300EmitVec(ctx, &rmesa->state.aos[i], - VB->AttribPtr[tab[i]].data, - t_emit_size(&VB->AttribPtr[tab[i]]), - VB->AttribPtr[tab[i]].stride, count); + vb->AttribPtr[tab[i]]->data, + vb->AttribPtr[tab[i]]->size, + vb->AttribPtr[tab[i]]->stride, count); } - rmesa->state.aos[i].aos_size = - t_aos_size(&VB->AttribPtr[tab[i]]); - - comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type); + rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; - for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) { - if ((rmesa->state.aos[i].aos_offset - - comp_size * fix) % 4) + for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { + if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { continue; - + } found = 1; break; } @@ -529,70 +466,41 @@ int r300EmitArrays(GLcontext * ctx) if (fix > 0) { WARN_ONCE("Feeling lucky?\n"); } - - rmesa->state.aos[i].aos_offset -= comp_size * fix; - - for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; + for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { swizzle[i][ci] += fix; + } } else { WARN_ONCE ("Cannot handle offset %x with stride %d, comp %d\n", rmesa->state.aos[i].aos_offset, rmesa->state.aos[i].aos_stride, - VB->AttribPtr[tab[i]].size); + vb->AttribPtr[tab[i]]->size); return R300_FALLBACK_TCL; } } - /* setup INPUT_ROUTE */ - R300_STATECHANGE(r300, vir[0]); - ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count = - t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr, - inputs, tab, nr); - - R300_STATECHANGE(r300, vir[1]); - ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count = - t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); - - /* Set up input_cntl */ - /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */ - R300_STATECHANGE(r300, vic); - r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */ - r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead); - - /* Stage 3: VAP output */ - - R300_STATECHANGE(r300, vof); - - r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0; - r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0; - - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - if (OutputsWritten & (1 << VERT_RESULT_COL0)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; - - if (OutputsWritten & (1 << VERT_RESULT_COL1)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; - - /*if(OutputsWritten & (1 << VERT_RESULT_BFC0)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; - - if(OutputsWritten & (1 << VERT_RESULT_BFC1)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */ - //if(OutputsWritten & (1 << VERT_RESULT_FOGC)) - - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) - r300->hw.vof.cmd[R300_VOF_CNTL_0] |= - R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) - r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i)); + /* Setup INPUT_ROUTE. */ + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + vb->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + + /* Setup INPUT_CNTL. */ + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + + /* Setup OUTPUT_VTX_FMT. */ + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = + r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = + r300VAPOutputCntl1(ctx, OutputsWritten); rmesa->state.aos_count = nr; @@ -625,3 +533,19 @@ void r300ReleaseArrays(GLcontext * ctx) r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); } } + +void r300EmitCacheFlush(r300ContextPtr rmesa) +{ + int cmd_reserved = 0; + int cmd_written = 0; + + drm_radeon_cmd_header_t *cmd = NULL; + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); + + reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); + + +} diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 7be098f..a6d69ec 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -44,22 +44,13 @@ #include "r300_cmdbuf.h" #include "radeon_reg.h" -/* - * CP type-3 packets - */ -#define RADEON_CP_PACKET3_UNK1B 0xC0001B00 -#define RADEON_CP_PACKET3_INDX_BUFFER 0xC0003300 -#define RADEON_CP_PACKET3_3D_DRAW_VBUF_2 0xC0003400 -#define RADEON_CP_PACKET3_3D_DRAW_IMMD_2 0xC0003500 -#define RADEON_CP_PACKET3_3D_DRAW_INDX_2 0xC0003600 -#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 -#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003202 -#define RADEON_CP_PACKET3_3D_CLEAR_CMASK 0xC0003802 -#define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003702 - +/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up + * with DRM */ #define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) +#define CP_PACKET3( pkt, n ) \ + (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) -static __inline__ uint32_t cmdpacket0(int reg, int count) +static inline uint32_t cmdpacket0(int reg, int count) { drm_r300_cmd_header_t cmd; @@ -71,7 +62,7 @@ static __inline__ uint32_t cmdpacket0(int reg, int count) return cmd.u; } -static __inline__ uint32_t cmdvpu(int addr, int count) +static inline uint32_t cmdvpu(int addr, int count) { drm_r300_cmd_header_t cmd; @@ -83,7 +74,7 @@ static __inline__ uint32_t cmdvpu(int addr, int count) return cmd.u; } -static __inline__ uint32_t cmdpacket3(int packet) +static inline uint32_t cmdpacket3(int packet) { drm_r300_cmd_header_t cmd; @@ -93,7 +84,7 @@ static __inline__ uint32_t cmdpacket3(int packet) return cmd.u; } -static __inline__ uint32_t cmdcpdelay(unsigned short count) +static inline uint32_t cmdcpdelay(unsigned short count) { drm_r300_cmd_header_t cmd; @@ -103,7 +94,7 @@ static __inline__ uint32_t cmdcpdelay(unsigned short count) return cmd.u; } -static __inline__ uint32_t cmdwait(unsigned char flags) +static inline uint32_t cmdwait(unsigned char flags) { drm_r300_cmd_header_t cmd; @@ -113,7 +104,7 @@ static __inline__ uint32_t cmdwait(unsigned char flags) return cmd.u; } -static __inline__ uint32_t cmdpacify(void) +static inline uint32_t cmdpacify(void) { drm_r300_cmd_header_t cmd; @@ -234,5 +225,15 @@ void r300UseArrays(GLcontext * ctx); #endif extern void r300ReleaseArrays(GLcontext * ctx); +extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); +extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); + +extern void r300EmitCacheFlush(r300ContextPtr rmesa); + +extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); +extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 416ea7f..90f5027 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -172,11 +172,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); + r300EmitCacheFlush(rmesa); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } @@ -224,25 +220,23 @@ static void r300EmitClearState(GLcontext * ctx) e32(R300_INPUT_CNTL_0_COLOR); e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - if (!has_tcl) { - R300_STATECHANGE(r300, vte); - /* comes from fglrx startup of clear */ - reg_start(R300_SE_VTE_CNTL, 1); - e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA); - e32(0x8); - - reg_start(0x21dc, 0); - e32(0xaaaaaaaa); - } + R300_STATECHANGE(r300, vte); + /* comes from fglrx startup of clear */ + reg_start(R300_SE_VTE_CNTL, 1); + e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA); + e32(0x8); + + reg_start(0x21dc, 0); + e32(0xaaaaaaaa); R300_STATECHANGE(r300, vof); reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT); - e32(0x0); /* no textures */ + e32(0x0); /* no textures */ R300_STATECHANGE(r300, txe); reg_start(R300_TX_ENABLE, 0); @@ -414,19 +408,22 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) void r300Flush(GLcontext * ctx) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - if (r300->cmdbuf.count_used > r300->cmdbuf.count_reemit) - r300FlushCmdBuf(r300, __FUNCTION__); + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) + r300FlushCmdBuf(rmesa, __FUNCTION__); } #ifdef USER_BUFFERS #include "r300_mem.h" -static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) { struct r300_dma_buffer *dmabuf; size = MAX2(size, RADEON_BUFFER_SIZE * 16); @@ -438,9 +435,12 @@ static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) rmesa->dma.flush(rmesa); } - if (rmesa->dma.current.buf) + if (rmesa->dma.current.buf) { +#ifdef USER_BUFFERS + r300_mem_use(rmesa, rmesa->dma.current.buf->id); +#endif r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); - + } if (rmesa->dma.nr_released_bufs > 4) r300FlushCmdBuf(rmesa, __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h index 7a19a2c..e1143fb 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.h +++ b/src/mesa/drivers/dri/r300/r300_ioctl.h @@ -56,4 +56,5 @@ extern void r300AllocDmaRegion(r300ContextPtr rmesa, extern void r300InitIoctlFuncs(struct dd_function_table *functions); +extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size); #endif /* __R300_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 0a31f0b..1baa74c 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -116,6 +116,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */ #define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 + /* each of the following is 3 bits wide, specifies number + of components */ # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 @@ -280,9 +282,32 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 # define R300_PVS_UPLOAD_PROGRAM 0x00000000 +/* gap */ # define R300_PVS_UPLOAD_PARAMETERS 0x00000200 +/* gap */ +# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 +# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 +# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 +# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 +# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 +# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 # define R300_PVS_UPLOAD_POINTSIZE 0x00000406 +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif + /* gap */ #define R300_VAP_PVS_UPLOAD_DATA 0x2208 @@ -299,6 +324,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_221C_NORMAL 0x00000000 # define R300_221C_CLEAR 0x0001C000 +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_CLIP_X_0 0x2220 +#define R300_VAP_CLIP_X_1 0x2224 +#define R300_VAP_CLIP_Y_0 0x2228 +#define R300_VAP_CLIP_Y_1 0x2230 + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -322,13 +359,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The meaning of the two UNKNOWN fields is obviously not known. However, * experiments so far have shown that both *must* point to an instruction * inside the vertex program, otherwise the GPU locks up. + * * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and - * CNTL_1_UNKNOWN points to instruction where last write to position takes - * place. + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * * Most likely this is used to ignore rest of the program in cases * where group of verts arent visible. For some reason this "section" * is sometimes accepted other instruction that have no relationship with - *position calculations. + * position calculations. */ #define R300_VAP_PVS_CNTL_1 0x22D0 # define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 @@ -967,7 +1006,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * first node is stored in NODE_2, the second node is stored in NODE_3. * * Offsets are relative to the master offset from PFS_CNTL_2. - * LAST_NODE is set for the last node, and only for the last node. */ #define R300_PFS_NODE_0 0x4610 #define R300_PFS_NODE_1 0x4614 @@ -981,7 +1019,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) # define R300_PFS_NODE_TEX_END_SHIFT 17 # define R300_PFS_NODE_TEX_END_MASK (31 << 17) -/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */ # define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) # define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) @@ -1591,6 +1628,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_EB_UNK1_SHIFT 24 # define R300_EB_UNK1 (0x80<<24) # define R300_EB_UNK2 0x0810 +#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 #define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 /* END: Packet 3 commands */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index cc13e9a..eee1e80 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -45,6 +45,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * obviously this does work... Further investigation is needed. * * \author Nicolai Haehnle + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... */ #include "glheader.h" @@ -76,7 +79,7 @@ extern int future_hw_tcl_on; /** * \brief Convert a OpenGL primitive type into a R300 primitive type. */ -static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) +int r300PrimitiveType(r300ContextPtr rmesa, int prim) { switch (prim & PRIM_MODE_MASK) { case GL_POINTS: @@ -116,7 +119,7 @@ static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) } } -static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) { int verts_off = 0; @@ -168,16 +171,13 @@ static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, - int elt_size) +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_dma_region *rvb = &rmesa->state.elt_dma; void *out; - assert(elt_size == 2 || elt_size == 4); - - if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) { + if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; rvb->start = ((char *)elts) - rvb->address; rvb->aos_offset = @@ -189,66 +189,27 @@ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, _mesa_exit(-1); } - r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size); + r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); rvb->aos_offset = GET_START(rvb); out = rvb->address + rvb->start; - memcpy(out, elts, n_elts * elt_size); + memcpy(out, elts, n_elts * 4); } static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, - int vertex_count, int type, int elt_size) + int vertex_count, int type) { int cmd_reserved = 0; int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; - unsigned long t_addr; - unsigned long magic_1, magic_2; - - assert(elt_size == 2 || elt_size == 4); - - if (addr & (elt_size - 1)) { - WARN_ONCE("Badly aligned buffer\n"); - return; - } - magic_1 = (addr % 32) / 4; - t_addr = addr & ~0x1d; - magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if (elt_size == 4) { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (vertex_count << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (vertex_count << 16) | type); - } - - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); -#ifdef OPTIMIZE_ELTS - if (elt_size == 4) { - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr); - } else { - e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); - e32(t_addr); - } -#else + start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); e32(addr); -#endif - - if (elt_size == 4) { - e32(vertex_count); - } else { -#ifdef OPTIMIZE_ELTS - e32(magic_2); -#else - e32((vertex_count + 1) / 2); -#endif - } + e32(vertex_count); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -263,26 +224,23 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); + start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); e32(nr); + for (i = 0; i + 1 < nr; i += 2) { - e32((rmesa->state.aos[i].aos_size << 0) - | (rmesa->state.aos[i].aos_stride << 8) - | (rmesa->state.aos[i + 1].aos_size << 16) - | (rmesa->state.aos[i + 1].aos_stride << 24) - ); - e32(rmesa->state.aos[i].aos_offset + - offset * 4 * rmesa->state.aos[i].aos_stride); - e32(rmesa->state.aos[i + 1].aos_offset + - offset * 4 * rmesa->state.aos[i + 1].aos_stride); + e32((rmesa->state.aos[i].aos_size << 0) | + (rmesa->state.aos[i].aos_stride << 8) | + (rmesa->state.aos[i + 1].aos_size << 16) | + (rmesa->state.aos[i + 1].aos_stride << 24)); + + e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); + e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); } if (nr & 1) { - e32((rmesa->state.aos[nr - 1].aos_size << 0) - | (rmesa->state.aos[nr - 1].aos_stride << 8) - ); - e32(rmesa->state.aos[nr - 1].aos_offset + - offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + e32((rmesa->state.aos[nr - 1].aos_size << 0) | + (rmesa->state.aos[nr - 1].aos_stride << 8)); + e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); } } @@ -292,130 +250,67 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; - start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) - | type); + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); } static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, int start, int end, int prim) { int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; - type = r300PrimitiveType(rmesa, ctx, prim); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); if (type < 0 || num_verts <= 0) return; - if (rmesa->state.VB.Elts) { + if (vb->Elts) { r300EmitAOS(rmesa, rmesa->state.aos_count, start); if (num_verts > 65535) { /* not implemented yet */ WARN_ONCE("Too many elts\n"); return; } - r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, - rmesa->state.VB.elt_size); - r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, - num_verts, type, rmesa->state.VB.elt_size); + r300EmitElts(ctx, vb->Elts, num_verts); + r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); } else { r300EmitAOS(rmesa, rmesa->state.aos_count, start); r300FireAOS(rmesa, num_verts, type); } } -#define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \ - rvb->AttribPtr[(a)].type = GL_FLOAT, \ - rvb->AttribPtr[(a)].stride = vb->b->stride, \ - rvb->AttribPtr[(a)].data = vb->b->data - -static void radeon_vb_to_rvb(r300ContextPtr rmesa, - struct radeon_vertex_buffer *rvb, - struct vertex_buffer *vb) -{ - int i; - GLcontext *ctx; - ctx = rmesa->radeon.glCtx; - - memset(rvb, 0, sizeof(*rvb)); - - rvb->Elts = vb->Elts; - rvb->elt_size = 4; - rvb->elt_min = 0; - rvb->elt_max = vb->Count; - - rvb->Count = vb->Count; - - if (hw_tcl_on) { - CONV_VB(VERT_ATTRIB_POS, ObjPtr); - } else { - assert(vb->ClipPtr); - CONV_VB(VERT_ATTRIB_POS, ClipPtr); - } - - CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr); - CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]); - CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]); - CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr); - - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) - CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]); - - for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++) - CONV_VB(VERT_ATTRIB_GENERIC0 + i, - AttribPtr[VERT_ATTRIB_GENERIC0 + i]); - - rvb->Primitive = vb->Primitive; - rvb->PrimitiveCount = vb->PrimitiveCount; - rvb->LockFirst = rvb->LockCount = 0; - rvb->lock_uptodate = GL_FALSE; -} - static GLboolean r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct radeon_vertex_buffer *VB = &rmesa->state.VB; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (stage) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - radeon_vb_to_rvb(rmesa, VB, &tnl->vb); - } - r300UpdateShaders(rmesa); if (r300EmitArrays(ctx)) return GL_TRUE; r300UpdateShaderStates(rmesa); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); - + r300EmitCacheFlush(rmesa); r300EmitState(rmesa); - for (i = 0; i < VB->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); - GLuint start = VB->Primitive[i].start; - GLuint end = VB->Primitive[i].start + VB->Primitive[i].count; + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; r300RunRenderPrimitive(rmesa, ctx, start, end, prim); } - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); + r300EmitCacheFlush(rmesa); #ifdef USER_BUFFERS r300UseArrays(ctx); @@ -463,8 +358,6 @@ static int r300Fallback(GLcontext * ctx) FALLBACK_IF(ctx->Point.PointSprite); if (!r300->disable_lowimpact_fallback) { - FALLBACK_IF(ctx->Polygon.OffsetPoint); - FALLBACK_IF(ctx->Polygon.OffsetLine); FALLBACK_IF(ctx->Polygon.StippleFlag); FALLBACK_IF(ctx->Multisample.Enabled); FALLBACK_IF(ctx->Line.StippleFlag); @@ -478,12 +371,17 @@ static int r300Fallback(GLcontext * ctx) static GLboolean r300RunNonTCLRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); if (r300Fallback(ctx) >= R300_FALLBACK_RAST) return GL_TRUE; + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; + return r300RunRender(ctx, stage); } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 59fe17b..5f5ac7c 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -54,6 +54,7 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) fp->translated = GL_FALSE; break; } + /* need this for tcl fallbacks */ _tnl_program_string(ctx, target, prog); } @@ -61,7 +62,7 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { - return 1; + return GL_TRUE; } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e8d67f9..088216c 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -65,6 +65,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" +extern int future_hw_tcl_on; +extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); + static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) { GLubyte color[4]; @@ -79,6 +82,8 @@ static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], color[1], color[2]); + rmesa->hw.blend_color.cmd[2] = 0; + rmesa->hw.blend_color.cmd[3] = 0; } /** @@ -314,20 +319,34 @@ static void r300UpdateCulling(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); uint32_t val = 0; - R300_STATECHANGE(r300, cul); if (ctx->Polygon.CullFlag) { - if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - val = R300_CULL_FRONT | R300_CULL_BACK; - else if (ctx->Polygon.CullFaceMode == GL_FRONT) + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: val = R300_CULL_FRONT; - else + break; + case GL_BACK: val = R300_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + val = R300_CULL_FRONT | R300_CULL_BACK; + break; + default: + break; + } + } - if (ctx->Polygon.FrontFace == GL_CW) - val |= R300_FRONT_FACE_CW; - else - val |= R300_FRONT_FACE_CCW; + switch (ctx->Polygon.FrontFace) { + case GL_CW: + val |= R300_FRONT_FACE_CW; + break; + case GL_CCW: + val |= R300_FRONT_FACE_CCW; + break; + default: + break; } + + R300_STATECHANGE(r300, cul); r300->hw.cul.cmd[R300_CUL_CULL] = val; } @@ -341,6 +360,20 @@ static void r300SetEarlyZState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zstencil_format); + switch (ctx->Visual.depthBits) { + case 16: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; + break; + case 24: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); + _mesa_exit(-1); + } + + // r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; @@ -352,6 +385,9 @@ static void r300SetEarlyZState(GLcontext * ctx) /* disable early Z */ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; } + + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; } static void r300SetAlphaState(GLcontext * ctx) @@ -400,6 +436,7 @@ static void r300SetAlphaState(GLcontext * ctx) R300_STATECHANGE(r300, at); r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300SetEarlyZState(ctx); } @@ -462,97 +499,6 @@ static void r300SetDepthState(GLcontext * ctx) r300SetEarlyZState(ctx); } -/** - * Handle glEnable()/glDisable(). - * - * \note Mesa already filters redundant calls to glEnable/glDisable. - */ -static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(cap), - state ? "GL_TRUE" : "GL_FALSE"); - - switch (cap) { - /* Fast track this one... - */ - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - break; - - case GL_FOG: - R300_STATECHANGE(r300, fogs); - if (state) { - r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FOG_ENABLE; - - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL); - ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, - &ctx->Fog.Density); - ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - } else { - r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FOG_ENABLE; - } - - break; - - case GL_ALPHA_TEST: - r300SetAlphaState(ctx); - break; - - case GL_BLEND: - case GL_COLOR_LOGIC_OP: - r300SetBlendState(ctx); - break; - - case GL_DEPTH_TEST: - r300SetDepthState(ctx); - break; - - case GL_STENCIL_TEST: - if (r300->state.stencil.hw_stencil) { - R300_STATECHANGE(r300, zs); - if (state) { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_STENCIL_ENABLE; - } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= - ~R300_RB3D_STENCIL_ENABLE; - } - } else { -#if R200_MERGED - FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); -#endif - } - break; - - case GL_CULL_FACE: - r300UpdateCulling(ctx); - break; - - case GL_POLYGON_OFFSET_POINT: - case GL_POLYGON_OFFSET_LINE: - break; - - case GL_POLYGON_OFFSET_FILL: - R300_STATECHANGE(r300, occlusion_cntl); - if (state) { - r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); - } else { - r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); - } - break; - default: - radeonEnable(ctx, cap, state); - return; - } -} - static void r300UpdatePolygonMode(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -601,6 +547,9 @@ static void r300UpdatePolygonMode(GLcontext * ctx) R300_STATECHANGE(r300, polygon_mode); r300->hw.polygon_mode.cmd[1] = hw_mode; } + + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; } /** @@ -784,8 +733,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) static void r300PointSize(GLcontext * ctx, GLfloat size) { r300ContextPtr r300 = R300_CONTEXT(ctx); - - size = ctx->Point._Size; + /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); R300_STATECHANGE(r300, ps); r300->hw.ps.cmd[R300_PS_POINTSIZE] = @@ -800,11 +749,12 @@ static void r300LineWidth(GLcontext * ctx, GLfloat widthf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - widthf = ctx->Line._Width; - + widthf = CLAMP(widthf, + ctx->Const.MinPointSize, + ctx->Const.MaxPointSize); R300_STATECHANGE(r300, lcntl); - r300->hw.lcntl.cmd[1] = (int)(widthf * 6.0); - r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_VE; + r300->hw.lcntl.cmd[1] = + R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0); } static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) @@ -850,6 +800,7 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); R300_STATECHANGE(rmesa, shade); + rmesa->hw.shade.cmd[1] = 0x00000002; switch (mode) { case GL_FLAT: rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; @@ -860,6 +811,8 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) default: return; } + rmesa->hw.shade.cmd[3] = 0x00000000; + rmesa->hw.shade.cmd[4] = 0x00000000; } static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, @@ -1460,11 +1413,9 @@ static void r300SetupRSUnit(GLcontext * ctx) int i; if (hw_tcl_on) - OutputsWritten.vp_outputs = - CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else - RENDERINPUTS_COPY(OutputsWritten.index_bitset, - r300->state.render_inputs_bitset); + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); if (ctx->FragmentProgram._Current) InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; @@ -1496,9 +1447,7 @@ static void r300SetupRSUnit(GLcontext * ctx) } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 - | R300_RS_INTERP_USED - | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_INTERP_USED | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; @@ -1508,65 +1457,45 @@ static void r300SetupRSUnit(GLcontext * ctx) | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); high_rr = fp_reg; - if (!R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_TEX0 + i, - _TNL_ATTRIB_TEX(i))) { - /* Passing invalid data here can lock the GPU. */ - WARN_ONCE - ("fragprog wants coords for tex%d, vp doesn't provide them!\n", - i); - //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base); - //_mesa_exit(-1); + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } - InputsRead &= ~(FRAG_BIT_TEX0 << i); - fp_reg++; } /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { in_texcoords++; + } } if (InputsRead & FRAG_BIT_COL0) { - if (!R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - WARN_ONCE - ("fragprog wants col0, vp doesn't provide it\n"); - goto out; /* FIXME */ - //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base); - //_mesa_exit(-1); + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); } - - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 - | R300_RS_ROUTE_0_COLOR - | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - InputsRead &= ~FRAG_BIT_COL0; - col_interp_nr++; } - out: if (InputsRead & FRAG_BIT_COL1) { - if (!R300_OUTPUTS_WRITTEN_TEST - (OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - WARN_ONCE - ("fragprog wants col1, vp doesn't provide it\n"); - //_mesa_exit(-1); + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); } - - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= - R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | - (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - InputsRead &= ~FRAG_BIT_COL1; - if (high_rr < 1) - high_rr = 1; - col_interp_nr++; } /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 - | R300_RS_ROUTE_0_COLOR - | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); col_interp_nr++; } @@ -1575,17 +1504,13 @@ static void r300SetupRSUnit(GLcontext * ctx) | R300_RS_CNTL_0_UNKNOWN_18; assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = - cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) - WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", - InputsRead); + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } -#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) - #define bump_vpu_count(ptr, new_count) do{\ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ int _nc=(new_count)/4; \ @@ -1593,9 +1518,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ }while(0) -void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct - r300_vertex_shader_fragment - *vsf) +static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf) { int i; @@ -1603,8 +1526,7 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s return; if (vsf->length & 0x3) { - fprintf(stderr, - "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); + fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); _mesa_exit(-1); } @@ -1612,147 +1534,101 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s case 0: R300_STATECHANGE(r300, vpi); for (i = 0; i < vsf->length; i++) - r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + - 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpi.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff)); break; case 2: R300_STATECHANGE(r300, vpp); for (i = 0; i < vsf->length; i++) - r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + - 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpp.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff)); break; case 4: R300_STATECHANGE(r300, vps); for (i = 0; i < vsf->length; i++) - r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = - (vsf->body.d[i]); - bump_vpu_count(r300->hw.vps.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff)); break; default: - fprintf(stderr, - "%s:%s don't know how to handle dest %04x\n", - __FILE__, __FUNCTION__, dest); + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); _mesa_exit(-1); } } -/* just a skeleton for now.. */ - -/* Generate a vertex shader that simply transforms vertex and texture coordinates, - while leaving colors intact. Nothing fancy (like lights) - - If implementing lights make a copy first, so it is easy to switch between the two versions */ -static void r300GenerateSimpleVertexShader(r300ContextPtr r300) +static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) { - int i; + struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); GLuint o_reg = 0; - - /* Allocate parameters */ - r300->state.vap_param.transform_offset = 0x0; /* transform matrix */ - r300->state.vertex_shader.param_offset = 0x0; - r300->state.vertex_shader.param_count = 0x4; /* 4 vector values - 4x4 matrix */ - - r300->state.vertex_shader.program_start = 0x0; - r300->state.vertex_shader.unknown_ptr1 = 0x4; /* magic value ? */ - r300->state.vertex_shader.program_end = 0x0; - - r300->state.vertex_shader.unknown_ptr2 = 0x0; /* magic value */ - r300->state.vertex_shader.unknown_ptr3 = 0x4; /* magic value */ - - r300->state.vertex_shader.unknown1.length = 0; - r300->state.vertex_shader.unknown2.length = 0; - -#define WRITE_OP(oper,source1,source2,source3) {\ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[0]=(source1); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[1]=(source2); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[2]=(source3); \ - r300->state.vertex_shader.program_end++; \ - } - - for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) - if (r300->state.sw_tcl_inputs[i] != -1) { - WRITE_OP(EASY_VSF_OP(MUL, o_reg++, ALL, RESULT), - VSF_REG(r300->state.sw_tcl_inputs[i]), - VSF_ATTR_UNITY(r300->state. - sw_tcl_inputs[i]), - VSF_UNITY(r300->state.sw_tcl_inputs[i]) - ) - + int i; + int inst_count = 0; + int param_count = 0; + int program_end = 0; + + for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { + if (rmesa->state.sw_tcl_inputs[i] != -1) { + prog->program.body.i[program_end + 0] = EASY_VSF_OP(MUL, o_reg++, ALL, RESULT); + prog->program.body.i[program_end + 1] = VSF_REG(rmesa->state.sw_tcl_inputs[i]); + prog->program.body.i[program_end + 2] = VSF_ATTR_UNITY(rmesa->state.sw_tcl_inputs[i]); + prog->program.body.i[program_end + 3] = VSF_UNITY(rmesa->state.sw_tcl_inputs[i]); + program_end += 4; } + } - r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */ - r300->state.vertex_shader.program.length = - (r300->state.vertex_shader.program_end + 1) * 4; + prog->program.length = program_end; - r300->state.vertex_shader.unknown_ptr1 = r300->state.vertex_shader.program_end; /* magic value ? */ - r300->state.vertex_shader.unknown_ptr2 = r300->state.vertex_shader.program_end; /* magic value ? */ - r300->state.vertex_shader.unknown_ptr3 = r300->state.vertex_shader.program_end; /* magic value ? */ + r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, + &(prog->program)); + inst_count = (prog->program.length / 4) - 1; + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | + (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | + (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | + (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); } -static void r300SetupVertexProgram(r300ContextPtr rmesa) +static void r300SetupRealVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; - int inst_count; - int param_count; - struct r300_vertex_program *prog = - (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + int inst_count = 0; + int param_count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + /* FIXME: r300SetupVertexProgramFragment */ R300_STATECHANGE(rmesa, vpp); param_count = - r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current /*prog */ , + r300VertexProgUpdateParams(ctx, + (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current, (float *)&rmesa->hw.vpp. cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - /* Reset state, in case we don't use something */ - ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); - -#if 0 - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, - &(rmesa->state.vertex_shader.unknown1)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, - &(rmesa->state.vertex_shader.unknown2)); -#endif - - inst_count = prog->program.length / 4 - 1; + r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); + inst_count = (prog->program.length / 4) - 1; R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) - | (inst_count /*pos_end */ << R300_PVS_CNTL_1_POS_END_SHIFT) - | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | + (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | + (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) - | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | + (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (0 /*rmesa->state.vertex_shader.unknown_ptr2 */ << - R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) - | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3 */ << - 0); - - /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, - so I leave it as a reminder */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); } -static void r300SetupVertexShader(r300ContextPtr rmesa) +static void r300SetupVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -1765,50 +1641,108 @@ static void r300SetupVertexShader(r300ContextPtr rmesa) 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); - if (hw_tcl_on - && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))-> - translated) { - r300SetupVertexProgram(rmesa); - return; + if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) { + r300SetupRealVertexProgram(rmesa); + } else { + /* FIXME: This needs to be replaced by vertex shader generation code. */ + r300SetupDefaultVertexProgram(rmesa); } - /* This needs to be replaced by vertex shader generation code */ - r300GenerateSimpleVertexShader(rmesa); - - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, - &(rmesa->state.vertex_shader.program)); + /* FIXME: This is done for vertex shader fragments, but also needs to be + * done for vap_pvs, so I leave it as a reminder. */ #if 0 - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, - &(rmesa->state.vertex_shader.unknown1)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, - &(rmesa->state.vertex_shader.unknown2)); + reg_start(R300_VAP_PVS_WAITIDLE, 0); + e32(0x00000000); #endif +} - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (rmesa->state.vertex_shader. - program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) - | (rmesa->state.vertex_shader. - unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT) - | (rmesa->state.vertex_shader. - program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (rmesa->state.vertex_shader. - param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) - | (rmesa->state.vertex_shader. - param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (rmesa->state.vertex_shader. - unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) - | (rmesa->state.vertex_shader.unknown_ptr3 << 0); +/** + * Enable/Disable states. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); - /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, - so I leave it as a reminder */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(cap), + state ? "GL_TRUE" : "GL_FALSE"); + + switch (cap) { + /* Fast track this one... + */ + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + break; + + case GL_FOG: + R300_STATECHANGE(r300, fogs); + if (state) { + r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FOG_ENABLE; + + r300Fogfv(ctx, GL_FOG_MODE, NULL); + r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); + r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); + r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); + r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + } else { + r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FOG_ENABLE; + } + + break; + + case GL_ALPHA_TEST: + r300SetAlphaState(ctx); + break; + + case GL_BLEND: + case GL_COLOR_LOGIC_OP: + r300SetBlendState(ctx); + break; + + case GL_DEPTH_TEST: + r300SetDepthState(ctx); + break; + + case GL_STENCIL_TEST: + if (r300->state.stencil.hw_stencil) { + R300_STATECHANGE(r300, zs); + if (state) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= + R300_RB3D_STENCIL_ENABLE; + } else { + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= + ~R300_RB3D_STENCIL_ENABLE; + } + } else { +#if R200_MERGED + FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); #endif + } + break; + + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; + + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + case GL_POLYGON_OFFSET_FILL: + R300_STATECHANGE(r300, occlusion_cntl); + if (state) { + r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); + } else { + r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); + } + break; + default: + radeonEnable(ctx, cap, state); + return; + } } /** @@ -1825,12 +1759,6 @@ static void r300ResetHwState(r300ContextPtr r300) if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - /* This is a place to initialize registers which - have bitfields accessed by different functions - and not all bits are used */ - - /* go and compute register values from GL state */ - r300UpdateWindow(ctx); r300ColorMask(ctx, @@ -1860,13 +1788,11 @@ static void r300ResetHwState(r300ContextPtr r300) r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - /* Initialize magic registers - TODO : learn what they really do, or get rid of - those we don't have to touch */ if (!has_tcl) r300->hw.vap_cntl.cmd[1] = 0x0014045a; else r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ + r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA @@ -1877,10 +1803,12 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk2134.cmd[1] = 0x00FFFFFF; r300->hw.unk2134.cmd[2] = 0x00000000; - if (_mesa_little_endian()) - r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; - else - r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; + +#ifdef MESA_LITTLE_ENDIAN + r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; +#else + r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; +#endif /* disable VAP/TCL on non-TCL capable chips */ if (!has_tcl) @@ -1890,16 +1818,22 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk221C.cmd[1] = R300_221C_NORMAL; - r300->hw.unk2220.cmd[1] = r300PackFloat32(1.0); - r300->hw.unk2220.cmd[2] = r300PackFloat32(1.0); - r300->hw.unk2220.cmd[3] = r300PackFloat32(1.0); - r300->hw.unk2220.cmd[4] = r300PackFloat32(1.0); + r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */ + r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */ - /* what about other chips than r300 or rv350??? */ - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) - r300->hw.unk2288.cmd[1] = R300_2288_R300; - else - r300->hw.unk2288.cmd[1] = R300_2288_RV350; + /* XXX: Other families? */ + if (has_tcl) { + switch (r300->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R300: + r300->hw.unk2288.cmd[1] = R300_2288_R300; + break; + default: + r300->hw.unk2288.cmd[1] = R300_2288_RV350; + break; + } + } r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE | R300_GB_LINE_STUFF_ENABLE @@ -1907,26 +1841,35 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; - if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350)) - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R300 | - R300_GB_TILE_SIZE_16; - else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV410 | - R300_GB_TILE_SIZE_16; - else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R420 | - R300_GB_TILE_SIZE_16; - else - r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV300 | - R300_GB_TILE_SIZE_16; - /* set to 0 when fog is disabled? */ + + /* XXX: Other families? */ + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16; + switch (r300->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R300; + break; + case CHIP_FAMILY_RV410: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV410; + break; + case CHIP_FAMILY_R420: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420; + break; + default: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + } + + /* XXX: set to 0 when fog is disabled? */ r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W; - r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = R300_AA_DISABLE; /* No antialiasing */ + + /* XXX: Enable anti-aliasing? */ + r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = R300_AA_DISABLE; r300->hw.unk4200.cmd[1] = r300PackFloat32(0.0); r300->hw.unk4200.cmd[2] = r300PackFloat32(0.0); @@ -1935,31 +1878,28 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4214.cmd[1] = 0x00050005; - r300PointSize(ctx, 0.0); + r300PointSize(ctx, 1.0); r300->hw.unk4230.cmd[1] = 0x18000006; r300->hw.unk4230.cmd[2] = 0x00020006; r300->hw.unk4230.cmd[3] = r300PackFloat32(1.0 / 192.0); - r300LineWidth(ctx, 0.0); + r300LineWidth(ctx, 1.0); r300->hw.unk4260.cmd[1] = 0; r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); - r300->hw.shade.cmd[1] = 0x00000002; r300ShadeModel(ctx, ctx->Light.ShadeModel); - r300->hw.shade.cmd[3] = 0x00000000; - r300->hw.shade.cmd[4] = 0x00000000; r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); - r300->hw.polygon_mode.cmd[2] = 0x00000001; - r300->hw.polygon_mode.cmd[3] = 0x00000000; r300->hw.zbias_cntl.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits); + r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint); + r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine); r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF; @@ -1977,21 +1917,18 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk46A4.cmd[5] = 0x00000001; r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL); - ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); - ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - ctx->Driver.Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); + r300Fogfv(ctx, GL_FOG_MODE, NULL); + r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); + r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); + r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); + r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); - r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300->hw.unk4BD8.cmd[1] = 0; r300->hw.unk4E00.cmd[1] = 0; r300BlendColor(ctx, ctx->Color.BlendColor); - r300->hw.blend_color.cmd[2] = 0; - r300->hw.blend_color.cmd[3] = 0; /* Again, r300ClearBuffer uses this */ r300->hw.cb.cmd[R300_CB_OFFSET] = @@ -2022,32 +1959,13 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4EA0.cmd[1] = 0x00000000; r300->hw.unk4EA0.cmd[2] = 0xffffffff; - switch (ctx->Visual.depthBits) { - case 16: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; - break; - case 24: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits); - _mesa_exit(-1); - - } - /* z compress? */ - //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; - - r300->hw.zstencil_format.cmd[3] = 0x00000003; - r300->hw.zstencil_format.cmd[4] = 0x00000000; - r300->hw.zb.cmd[R300_ZB_OFFSET] = r300->radeon.radeonScreen->depthOffset + r300->radeon.radeonScreen->fbLocation; r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; if (r300->radeon.sarea->tiling_enabled) { - /* Turn off when clearing buffers ? */ + /* XXX: Turn off when clearing buffers ? */ r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE; if (ctx->Visual.depthBits == 24) @@ -2070,14 +1988,10 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0); r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; } -//END: TODO + r300->hw.all_dirty = GL_TRUE; } - -extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); - -extern int future_hw_tcl_on; void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx; @@ -2117,7 +2031,6 @@ void r300UpdateShaders(r300ContextPtr rmesa) } r300UpdateStateParameters(ctx, _NEW_PROGRAM); } - } static void r300SetupPixelShader(r300ContextPtr rmesa) @@ -2136,62 +2049,61 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } -#define OUTPUT_FIELD(st, reg, field) \ - R300_STATECHANGE(rmesa, st); \ - for(i=0;i<=fp->alu_end;i++) \ - rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=fp->alu.inst[i].field;\ - rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, fp->alu_end+1); - OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0); - OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1); - OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2); - OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3); -#undef OUTPUT_FIELD + R300_STATECHANGE(rmesa, fpi[0]); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0; + } + + R300_STATECHANGE(rmesa, fpi[1]); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1; + } + + R300_STATECHANGE(rmesa, fpi[2]); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2; + } + + R300_STATECHANGE(rmesa, fpi[3]); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3; + } R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R300_FP_CNTL0] = fp->cur_node | (fp->first_node_has_tex << 3); + rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = + (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) | + (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); /* I just want to say, the way these nodes are stored.. weird.. */ for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { if (i < (fp->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i]. - alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) - | (fp->node[i]. - alu_end << R300_PFS_NODE_ALU_END_SHIFT) - | (fp->node[i]. - tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) - | (fp->node[i]. - tex_end << R300_PFS_NODE_TEX_END_SHIFT) - | fp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */ + (fp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) | + (fp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT) | + (fp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) | + (fp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT) | + fp->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; } } - /* PFS_CNTL_0 */ - rmesa->hw.fp.cmd[R300_FP_CNTL0] = - fp->cur_node | (fp->first_node_has_tex << 3); - /* PFS_CNTL_1 */ - rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; - /* PFS_CNTL_2 */ - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) - | (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) - | (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) - | (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); - R300_STATECHANGE(rmesa, fpp); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = - r300PackFloat24(fp->constant[i][0]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = - r300PackFloat24(fp->constant[i][1]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = - r300PackFloat24(fp->constant[i][2]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = - r300PackFloat24(fp->constant[i][3]); - } - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = - cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + } } void r300UpdateShaderStates(r300ContextPtr rmesa) @@ -2205,7 +2117,7 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300SetupTextures(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - r300SetupVertexShader(rmesa); + r300SetupVertexProgram(rmesa); r300SetupRSUnit(ctx); } diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 21a49b7..365f7ec 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -37,8 +37,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" +#define R300_NEWPRIM( rmesa ) \ + do { \ + if ( rmesa->dma.flush ) \ + rmesa->dma.flush( rmesa ); \ + } while (0) + #define R300_STATECHANGE(r300, atom) \ do { \ + R300_NEWPRIM(r300); \ r300->hw.atom.dirty = GL_TRUE; \ r300->hw.is_dirty = GL_TRUE; \ } while(0) diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c new file mode 100644 index 0000000..c949f33 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -0,0 +1,711 @@ +/************************************************************************** + +Copyright (C) 2007 Dave Airlie + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Dave Airlie + */ + +/* derived from r200 swtcl path */ + + + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "enums.h" +#include "image.h" +#include "imports.h" +#include "macros.h" + +#include "swrast/s_context.h" +#include "swrast/s_fog.h" +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" + +#include "r300_context.h" +#include "r300_swtcl.h" +#include "r300_state.h" +#include "r300_ioctl.h" +#include "r300_emit.h" +#include "r300_mem.h" + +static void flush_last_swtcl_prim( r300ContextPtr rmesa ); + + +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); +void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); +#define EMIT_ATTR( ATTR, STYLE ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +/* this differs from the VIR0 in emit.c - TODO merge them using another option */ +static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr) +{ + GLuint i, dw; + + /* type, inputs, stop bit, size */ + for (i = 0; i + 1 < nr; i += 2) { + dw = (inputs[tab[i]] << 8) | 0x3; + dw |= ((inputs[tab[i + 1]] << 8) | 0x3) << 16; + if (i + 2 == nr) { + dw |= (R300_VAP_INPUT_ROUTE_END << 16); + } + dst[i >> 1] = dw; + } + + if (nr & 1) { + dw = (inputs[tab[nr - 1]] << 8) | 0x3; + dw |= R300_VAP_INPUT_ROUTE_END; + dst[nr >> 1] = dw; + } + + return (nr + 1) >> 1; +} + +static void r300SetVertexFormat( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); + GLuint InputsRead = 0, OutputsWritten = 0; + int vap_fmt_0 = 0; + int vap_vte_cntl = 0; + int offset = 0; + int vte = 0; + GLint inputs[VERT_ATTRIB_MAX]; + GLint tab[VERT_ATTRIB_MAX]; + int swizzle[VERT_ATTRIB_MAX][4]; + GLuint i, nr; + + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); + RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + + /* Important: + */ + if ( VB->NdcPtr != NULL ) { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + } + else { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + rmesa->swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. + */ + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { + vap_vte_cntl |= R300_VTX_W0_FMT; + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + } else + EMIT_PAD(4 * sizeof(float)); + + offset = 4; + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + offset += 1; + } + + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { + rmesa->swtcl.coloroffset = offset; + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F ); + } + + offset += 4; + + rmesa->swtcl.specoffset = 0; + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4F ); + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; + } + + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_4F ); + } + } + } + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + inputs[i] = nr++; + } else { + inputs[i] = -1; + } + } + + /* Fixed, apply to vir0 only */ + if (InputsRead & (1 << VERT_ATTRIB_POS)) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + tab[nr++] = i; + } + } + + for (i = 0; i < nr; i++) { + int ci; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; + swizzle[i][3] = SWIZZLE_ONE; + + for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) { + swizzle[i][ci] = ci; + } + } + + R300_NEWPRIM(rmesa); + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + VB->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); + + rmesa->swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attr_count, + NULL, 0 ); + + rmesa->swtcl.vertex_size /= 4; + + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + + vte = rmesa->hw.vte.cmd[1]; + R300_STATECHANGE(rmesa, vte); + rmesa->hw.vte.cmd[1] = vte; + rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; +} + + +/* Flush vertices in the current dma region. + */ +static void flush_last_swtcl_prim( r300ContextPtr rmesa ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->dma.flush = NULL; + + if (rmesa->dma.current.buf) { + struct r300_dma_region *current = &rmesa->dma.current; + GLuint current_offset = GET_START(current); + + assert (current->start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + current->ptr); + + if (rmesa->dma.current.start != rmesa->dma.current.ptr) { + + r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); + + r300EmitState(rmesa); + + r300EmitVertexAOS( rmesa, + rmesa->swtcl.vertex_size, + current_offset); + + r300EmitVbufPrim( rmesa, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); + + r300EmitCacheFlush(rmesa); + } + + rmesa->swtcl.numverts = 0; + current->start = current->ptr; + } +} + +/* Alloc space in the current dma region. + */ +static void * +r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) +{ + GLuint bytes = vsize * nverts; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + r300RefillCurrentDmaRegion( rmesa, bytes); + + if (!rmesa->dma.flush) { + rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = flush_last_swtcl_prim; + } + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); + ASSERT( rmesa->dma.current.start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current.ptr ); + + { + GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); + rmesa->dma.current.ptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; + } +} + +static GLuint reduced_prim[] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, +}; + +static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); +static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); +//static void r300ResetLineStipple( GLcontext *ctx ); + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG r300ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 ) +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const char *r300verts = (char *)rmesa->swtcl.verts; +#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +#define VERTEX r300Vertex +#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) +#define PRINT_VERTEX(x) +#undef TAG +#define TAG(x) r300_##x +#include "tnl_dd/t_dd_triemit.h" + + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c ) +#define LINE( a, b ) r300_line( rmesa, a, b ) +#define POINT( a ) r300_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define R300_TWOSIDE_BIT 0x01 +#define R300_UNFILLED_BIT 0x02 +#define R300_MAX_TRIFUNC 0x04 + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[R300_MAX_TRIFUNC]; + +#define DO_FALLBACK 0 +#define DO_UNFILLED (IND & R300_UNFILLED_BIT) +#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_FLAT 0 +#define DO_OFFSET 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_RGBA 1 +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) + +/* Only used to pull back colors into vertices (ie, we know color is + * floating point). + */ +#define R300_COLOR( dst, src ) \ +do { \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \ +} while (0) + +#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c ) +#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset] +#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx] + +#define R300_SPEC( dst, src ) \ +do { \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ +} while (0) + +#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c ) +#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) +#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT) +#define TAG(x) x##_twoside +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) +#define TAG(x) x##_twoside_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + + +static void init_rast_tab( void ) +{ + init(); + init_twoside(); + init_unfilled(); + init_twoside_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + r300_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + r300_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#define INIT(x) do { \ + r300RenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->swtcl.vertex_size; \ + const char *r300verts = (char *)rmesa->swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) r300_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) r300_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ +static void r300ChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; + + if (index != rmesa->swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts; + tnl->Driver.Render.PrimTabElts = r300_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->swtcl.RenderIndex = index; + } +} + + +static void r300RenderStart(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + // fprintf(stderr, "%s\n", __FUNCTION__); + + r300ChooseRenderState(ctx); + r300SetVertexFormat(ctx); + + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); + + if (rmesa->dma.flush != 0 && + rmesa->dma.flush != flush_last_swtcl_prim) + rmesa->dma.flush( rmesa ); + +} + +static void r300RenderFinish(GLcontext *ctx) +{ +} + +static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (rmesa->swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hwprim; + } +} + +static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +{ + + r300ContextPtr rmesa = R300_CONTEXT(ctx); + rmesa->swtcl.render_primitive = prim; + + if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) + return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); + // fprintf(stderr, "%s\n", __FUNCTION__); + +} + +static void r300ResetLineStipple(GLcontext *ctx) +{ + + +} + +void r300InitSwtcl(GLcontext *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + /* FIXME: what are these numbers? */ + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); + + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->swtcl.RenderIndex = ~0; + rmesa->swtcl.render_primitive = GL_TRIANGLES; + rmesa->swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); + + _mesa_validate_all_lighting_tables( ctx ); + + tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; +} + +void r300DestroySwtcl(GLcontext *ctx) +{ +} + +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) +{ + int cmd_reserved = 0; + int cmd_written = 0; + + drm_radeon_cmd_header_t *cmd = NULL; + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); + e32(1); + e32(vertex_size | (vertex_size << 8)); + e32(offset); +} + +void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +{ + + int cmd_reserved = 0; + int cmd_written = 0; + int type, num_verts; + drm_radeon_cmd_header_t *cmd = NULL; + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); + + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); +} diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h new file mode 100644 index 0000000..2ea6ced --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_swtcl.h @@ -0,0 +1,45 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell - original r200 code + * Dave Airlie + */ + +#ifndef __R300_SWTCL_H__ +#define __R300_SWTCL_H__ + +#include "mtypes.h" +#include "swrast/swrast.h" +#include "r300_context.h" + +extern void r300InitSwtcl( GLcontext *ctx ); +extern void r300DestroySwtcl( GLcontext *ctx ); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 2a21c61..1805cec 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -294,27 +294,20 @@ static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, const GLubyte littleEndian = *((const GLubyte *)&ui); if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE - && !littleEndian) || (srcFormat == GL_ABGR_EXT - && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) - || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE - && littleEndian)) { + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { return &_mesa_texformat_rgba8888; - } else - if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) - || (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE - && littleEndian) || (srcFormat == GL_ABGR_EXT - && srcType == GL_UNSIGNED_INT_8_8_8_8) - || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE - && !littleEndian)) { + } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { return &_mesa_texformat_rgba8888_rev; - } else if (srcFormat == GL_BGRA && - ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || - srcType == GL_UNSIGNED_INT_8_8_8_8)) { + } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8)) { return &_mesa_texformat_argb8888_rev; - } else if (srcFormat == GL_BGRA && - ((srcType == GL_UNSIGNED_BYTE && littleEndian) || - srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { + } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { return &_mesa_texformat_argb8888; } else return _dri_texformat_argb8888; @@ -563,34 +556,31 @@ r300ValidateClientStorage(GLcontext * ctx, GLenum target, return 0; } - { - GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, - format, type); + GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, + format, type); - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: srcRowStride %d/%x\n", - __FUNCTION__, srcRowStride, srcRowStride); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: srcRowStride %d/%x\n", + __FUNCTION__, srcRowStride, srcRowStride); - /* Could check this later in upload, pitch restrictions could be - * relaxed, but would need to store the image pitch somewhere, - * as packing details might change before image is uploaded: - */ - if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) - || (srcRowStride & 63)) - return 0; + /* Could check this later in upload, pitch restrictions could be + * relaxed, but would need to store the image pitch somewhere, + * as packing details might change before image is uploaded: + */ + if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) + || (srcRowStride & 63)) + return 0; - /* Have validated that _mesa_transfer_teximage would be a straight - * memcpy at this point. NOTE: future calls to TexSubImage will - * overwrite the client data. This is explicitly mentioned in the - * extension spec. - */ - texImage->Data = (void *)pixels; - texImage->IsClientData = GL_TRUE; - texImage->RowStride = - srcRowStride / texImage->TexFormat->TexelBytes; + /* Have validated that _mesa_transfer_teximage would be a straight + * memcpy at this point. NOTE: future calls to TexSubImage will + * overwrite the client data. This is explicitly mentioned in the + * extension spec. + */ + texImage->Data = (void *)pixels; + texImage->IsClientData = GL_TRUE; + texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; - return 1; - } + return 1; } static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index e2e8355..723601a 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -63,29 +63,16 @@ SOFTWARE. */ void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) { + int i; + if (RADEON_DEBUG & DEBUG_TEXTURE) { fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj); } - if (rmesa != NULL) { - unsigned i; - - for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { - if (t == rmesa->state.texture.unit[i].texobj) { - rmesa->state.texture.unit[i].texobj = NULL; - /* This code below is meant to shorten state - pushed to the hardware by not programming - unneeded units. - - This does not appear to be worthwhile on R300 */ -#if 0 - remove_from_list(&rmesa->hw.tex[i]); - make_empty_list(&rmesa->hw.tex[i]); - remove_from_list(&rmesa->hw.cube[i]); - make_empty_list(&rmesa->hw.cube[i]); -#endif - } + for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { + if (rmesa->state.texture.unit[i].texobj == t) { + rmesa->state.texture.unit[i].texobj = NULL; } } } @@ -518,7 +505,7 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) t->base.lastLevel); } - if (!t || t->base.totalSize == 0) + if (t->base.totalSize == 0) return 0; if (RADEON_DEBUG & DEBUG_SYNC) { diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 8203189..1d2909f 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -54,7 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ - && tx_table_le[f].flag ) + && tx_table[f].flag ) #define _ASSIGN(entry, format) \ [ MESA_FORMAT_ ## entry ] = { format, 0, 1} @@ -70,53 +70,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static const struct tx_table { GLuint format, filter, flag; -} tx_table_be[] = { - /* *INDENT-OFF* */ - _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), - _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), - _ASSIGN(RGB888, 0xffffffff), - _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), - _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), - _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), - _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ), - _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE), - _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), - _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), - _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), - _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), - _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), - _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), - _ASSIGN(RGB_FLOAT32, 0xffffffff), - _ASSIGN(RGB_FLOAT16, 0xffffffff), - _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), - _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), - _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), - _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), - _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), - _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), - /* *INDENT-ON* */ -}; - -static const struct tx_table tx_table_le[] = { +} tx_table[] = { /* *INDENT-OFF* */ +#ifdef MESA_LITTLE_ENDIAN _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), +#else + _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), + _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), +#endif _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), @@ -131,8 +97,8 @@ static const struct tx_table tx_table_le[] = { _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ), - _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE), + _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), + _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), @@ -178,22 +144,10 @@ static void r300SetTexImages(r300ContextPtr rmesa, /* Set the hardware texture format */ - if (!t->image_override && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { - if (_mesa_little_endian()) { - t->format = - tx_table_le[baseImage->TexFormat->MesaFormat]. - format; - t->filter |= - tx_table_le[baseImage->TexFormat->MesaFormat]. - filter; - } else { - t->format = - tx_table_be[baseImage->TexFormat->MesaFormat]. - format; - t->filter |= - tx_table_be[baseImage->TexFormat->MesaFormat]. - filter; - } + if (!t->image_override + && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { + t->format = tx_table[baseImage->TexFormat->MesaFormat].format; + t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter; } else if (!t->image_override) { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); @@ -526,11 +480,11 @@ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) */ rmesa->state.texture.unit[unit].texobj->base.bound &= - ~(1UL << unit); + ~(1 << unit); } rmesa->state.texture.unit[unit].texobj = t; - t->base.bound |= (1UL << unit); + t->base.bound |= (1 << unit); t->dirty_state |= 1 << unit; driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ } @@ -538,15 +492,15 @@ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) return !t->border_fallback; } -void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, +void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) { r300ContextPtr rmesa = - (r300ContextPtr)((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate; + (r300ContextPtr) ((__DRIcontextPrivate *) pDRICtx->private)-> + driverPrivate; struct gl_texture_object *tObj = - _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); r300TexObjPtr t; - int idx; if (!tObj) return; @@ -563,24 +517,24 @@ void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, switch (depth) { case 32: - idx = 2; + t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + t->filter |= tx_table[2].filter; t->pitch_reg /= 4; break; case 24: default: - idx = 4; + t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + t->filter |= tx_table[4].filter; t->pitch_reg /= 4; break; case 16: - idx = 5; + t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + t->filter |= tx_table[5].filter; t->pitch_reg /= 2; break; } t->pitch_reg--; - - t->format = tx_table_le[idx].format; - t->filter |= tx_table_le[idx].filter; } static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 1d90ade..7d4e8c9 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -29,6 +29,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * \file * * \author Aapo Tahkola + * + * \author Oliver McFadden + * + * For a description of the vertex program instruction set see r300_reg.h. */ #include "glheader.h" @@ -55,54 +59,58 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #error Cannot change these! #endif -#define SCALAR_FLAG (1<<31) -#define FLAG_MASK (1<<31) -#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ -#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} - -static struct { - char *name; - int opcode; - unsigned long ip; /* number of input operands and flags */ -} op_names[] = { - /* *INDENT-OFF* */ - OPN(ABS, 1), - OPN(ADD, 2), - OPN(ARL, 1 | SCALAR_FLAG), - OPN(DP3, 2), - OPN(DP4, 2), - OPN(DPH, 2), - OPN(DST, 2), - OPN(EX2, 1 | SCALAR_FLAG), - OPN(EXP, 1 | SCALAR_FLAG), - OPN(FLR, 1), - OPN(FRC, 1), - OPN(LG2, 1 | SCALAR_FLAG), - OPN(LIT, 1), - OPN(LOG, 1 | SCALAR_FLAG), - OPN(MAD, 3), - OPN(MAX, 2), - OPN(MIN, 2), - OPN(MOV, 1), - OPN(MUL, 2), - OPN(POW, 2 | SCALAR_FLAG), - OPN(RCP, 1 | SCALAR_FLAG), - OPN(RSQ, 1 | SCALAR_FLAG), - OPN(SGE, 2), - OPN(SLT, 2), - OPN(SUB, 2), - OPN(SWZ, 1), - OPN(XPD, 2), - OPN(RCC, 0), //extra - OPN(PRINT, 0), - OPN(END, 0) - /* *INDENT-ON* */ -}; - -#undef OPN +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ + +#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + +#define FREE_TEMPS() \ + do { \ + if(u_temp_i < vp->num_temporaries) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, float *dst) + struct r300_vertex_program_cont *vp, + float *dst) { int pi; struct gl_vertex_program *mesa_vp = &vp->mesa_program; @@ -222,7 +230,7 @@ static unsigned long t_src_class(enum register_file file) } } -static __inline unsigned long t_swizzle(GLubyte swizzle) +static inline unsigned long t_swizzle(GLubyte swizzle) { /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; @@ -234,8 +242,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) int i; if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); + fprintf(stderr, "vp null in call to %s from %s\n", + __FUNCTION__, caller); return; } @@ -276,6 +284,8 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, } } +/* these two functions should probably be merged... */ + static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { @@ -294,7 +304,9 @@ static unsigned long t_src(struct r300_vertex_program *vp, static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ return MAKE_VSF_SOURCE(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), @@ -306,128 +318,741 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, (src->RelAddr << 4); } -static unsigned long t_opcode(enum prog_opcode opcode) +static GLboolean valid_dst(struct r300_vertex_program *vp, + struct prog_dst_register *dst) { + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } - switch (opcode) { - /* *INDENT-OFF* */ - case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; - case OPCODE_DST: return R300_VPI_OUT_OP_DST; - case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; - case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; - case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; - case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; - case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; - case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; - case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; - case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; - case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; - case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; - case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; - case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; - case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; - /* *INDENT-ON* */ + return GL_TRUE; +} - default: - fprintf(stderr, "%s: Should not be called with opcode %d!", - __FUNCTION__, opcode); - } - _mesa_exit(-1); - return 0; +/* + * Instruction Inputs Output Description + * ----------- ------ ------ -------------------------------- + * ABS v v absolute value + * ADD v,v v add + * ARL s a address register load + * DP3 v,v ssss 3-component dot product + * DP4 v,v ssss 4-component dot product + * DPH v,v ssss homogeneous dot product + * DST v,v v distance vector + * EX2 s ssss exponential base 2 + * EXP s v exponential base 2 (approximate) + * FLR v v floor + * FRC v v fraction + * LG2 s ssss logarithm base 2 + * LIT v v compute light coefficients + * LOG s v logarithm base 2 (approximate) + * MAD v,v,v v multiply and add + * MAX v,v v maximum + * MIN v,v v minimum + * MOV v v move + * MUL v,v v multiply + * POW s,s ssss exponentiate + * RCP s ssss reciprocal + * RSQ s ssss reciprocal square root + * SGE v,v v set on greater than or equal + * SLT v,v v set on less than + * SUB v,v v subtract + * SWZ v v extended swizzle + * XPD v,v v cross product + * + * Table X.5: Summary of vertex program instructions. "v" indicates a + * floating-point vector input or output, "s" indicates a floating-point + * scalar input, "ssss" indicates a scalar output replicated across a + * 4-component result vector, and "a" indicates a single address register + * component. + */ + +static GLuint *t_opcode_abs(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; } -static unsigned long op_operands(enum prog_opcode opcode) +static GLuint *t_opcode_add(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) { - int i; + unsigned long hw_op; - /* Can we trust mesas opcodes to be in order ? */ - for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++) - if (op_names[i].opcode == opcode) - return op_names[i].ip; +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = ONE_SRC_0; + inst[2] = t_src(vp, &src[0]); + inst[3] = t_src(vp, &src[1]); +#else + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; - fprintf(stderr, "op %d not found in op_names\n", opcode); - _mesa_exit(-1); - return 0; +#endif + + return inst; } -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) +static GLuint *t_opcode_arl(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ARL, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); - return GL_TRUE; + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; } -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ - t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ - (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ - t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ +static GLuint *t_opcode_dp3(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + inst[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + SWIZZLE_ZERO, t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + + inst[3] = ZERO_SRC_1; + + return inst; +} -#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) +static GLuint *t_opcode_dp4(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); -#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; -#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + return inst; +} -#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) +static GLuint *t_opcode_dph(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} -#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) +static GLuint *t_opcode_dst(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DST, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); -#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 + return inst; +} -#define FREE_TEMPS() \ - do { \ - if(u_temp_i < vp->num_temporaries) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ - vp->native = GL_FALSE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) +static GLuint *t_opcode_ex2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_EX2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_exp(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_EXP, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_flr(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3], int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + VSF_OUT_CLASS_TMP); + + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + inst += 4; + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = + MAKE_VSF_SOURCE(*u_temp_i, VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + /*VSF_FLAG_ALL */ ); + + inst[3] = ZERO_SRC_0; + (*u_temp_i)--; + + return inst; +} + +static GLuint *t_opcode_frc(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_lg2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_lit(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst[3] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *t_opcode_log(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_LOG, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_mad(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) { - int i, cur_reg = 0; - VERTEX_SHADER_INSTRUCTION *o_inst; - unsigned long operands; - int are_srcs_scalar; unsigned long hw_op; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == PROGRAM_TEMPORARY + && src[2].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *t_opcode_max(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_min(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MIN, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_mov(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + +#if 1 + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ONE_SRC_0; + inst[3] = ZERO_SRC_0; +#endif + + return inst; +} + +static GLuint *t_opcode_mul(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + unsigned long hw_op; + + // HW mul can take third arg but appears to have some other limitations. + + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_pow(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *t_opcode_rcp(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_RCP, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_rsq(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_RSQ, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_sge(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_SGE, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_slt(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_SLT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_sub(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + unsigned long hw_op; + + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ONE_SRC_0; + inst[3] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); +#else + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#endif + + return inst; +} + +static GLuint *t_opcode_swz(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + +#if 1 + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ONE_SRC_0; + inst[3] = ZERO_SRC_0; +#endif + + return inst; +} + +static GLuint *t_opcode_xpd(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3], int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + VSF_OUT_CLASS_TMP); + + inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + inst[3] = ZERO_SRC_1; + inst += 4; + (*u_temp_i)--; + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + inst[3] = + MAKE_VSF_SOURCE(*u_temp_i + 1, VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program *vp) +{ + int i; + int cur_reg = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) vp->inputs[i] = -1; @@ -437,39 +1062,71 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); - /* Assign outputs */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - -#if 0 /* Not supported yet */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = + vp->outputs[VERT_RESULT_COL0] + 1; + cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = + vp->outputs[VERT_RESULT_COL0] + 2; + cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = + vp->outputs[VERT_RESULT_COL0] + 3; + cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + } +#if 0 + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } #endif - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) - if (vp->key.OutputsWritten & (1 << i)) + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (vp->key.OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; + } + } +} + +static void r300TranslateVertexShader(struct r300_vertex_program *vp, + struct prog_instruction *vpi) +{ + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + vp->pos_end = 0; /* Not supported yet */ + vp->program.length = 0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ vp->translated = GL_TRUE; vp->native = GL_TRUE; - o_inst = vp->program.body.i; - for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) { + t_inputs_outputs(vp); + + for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + FREE_TEMPS(); if (!valid_dst(vp, &vpi->DstReg)) { @@ -478,61 +1135,62 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, vpi->DstReg.Index = u_temp_i; } - operands = op_operands(vpi->Opcode); - are_srcs_scalar = operands & SCALAR_FLAG; - operands &= OP_MASK; + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - for (i = 0; i < operands; i++) + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { src[i] = vpi->SrcReg[i]; + } - if (operands == 3) { /* TODO: scalars */ + if (num_operands == 3) { /* TODO: scalars */ if (CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + u_temp_i, VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + inst[1] = + MAKE_VSF_SOURCE(t_src_index + (vp, &src[2]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); + t_src_class(src[2]. + File), + VSF_FLAG_NONE) | + (src[2].RelAddr << 4); - o_inst->src[1] = ZERO_SRC_2; - o_inst->src[2] = ZERO_SRC_2; - o_inst++; + inst[2] = ZERO_SRC_2; + inst[3] = ZERO_SRC_2; + inst += 4; src[2].File = PROGRAM_TEMPORARY; src[2].Index = u_temp_i; src[2].RelAddr = 0; u_temp_i--; } - } - if (operands >= 2) { + if (num_operands >= 2) { if (CMP_SRCS(src[1], src[0])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + u_temp_i, VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + inst[1] = + MAKE_VSF_SOURCE(t_src_index + (vp, &src[0]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); + t_src_class(src[0]. + File), + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + inst += 4; src[0].File = PROGRAM_TEMPORARY; src[0].Index = u_temp_i; @@ -541,517 +1199,103 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, } } - /* These ops need special handling. */ switch (vpi->Opcode) { - case OPCODE_POW: - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_POW, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = t_src_scalar(vp, &src[1]); - goto next; - - case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - case OPCODE_SWZ: -#if 1 - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#else - hw_op = - (src[0].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#endif - - goto next; - + case OPCODE_ABS: + inst = t_opcode_abs(vp, vpi, inst, src); + break; case OPCODE_ADD: -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = ONE_SRC_0; - o_inst->src[1] = t_src(vp, &src[0]); - o_inst->src[2] = t_src(vp, &src[1]); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - -#endif - goto next; - - case OPCODE_MAD: - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY && - src[2].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - goto next; - - case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - o_inst->src[2] = 0; -#endif - goto next; - - case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[2] = 0; - goto next; - + inst = t_opcode_add(vp, vpi, inst, src); + break; + case OPCODE_ARL: + inst = t_opcode_arl(vp, vpi, inst, src); + break; + case OPCODE_DP3: + inst = t_opcode_dp3(vp, vpi, inst, src); + break; + case OPCODE_DP4: + inst = t_opcode_dp4(vp, vpi, inst, src); + break; + case OPCODE_DPH: + inst = t_opcode_dph(vp, vpi, inst, src); + break; + case OPCODE_DST: + inst = t_opcode_dst(vp, vpi, inst, src); + break; + case OPCODE_EX2: + inst = t_opcode_ex2(vp, vpi, inst, src); + break; + case OPCODE_EXP: + inst = t_opcode_exp(vp, vpi, inst, src); + break; case OPCODE_FLR: - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - /* Not 100% sure about this */ - (!src[0]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - - o_inst->src[2] = ZERO_SRC_0; - u_temp_i--; - goto next; - - case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - goto next; - - case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - goto next; - - case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - VSF_IN_COMPONENT_ONE, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_XPD: - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 - */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - src[1]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - o_inst++; - u_temp_i--; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - (!src[1]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - VSF_FLAG_NONE); - - goto next; - - case OPCODE_RCC: - fprintf(stderr, "Dont know how to handle op %d yet\n", - vpi->Opcode); - _mesa_exit(-1); + inst = + t_opcode_flr(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = t_opcode_frc(vp, vpi, inst, src); + break; + case OPCODE_LG2: + inst = t_opcode_lg2(vp, vpi, inst, src); + break; + case OPCODE_LIT: + inst = t_opcode_lit(vp, vpi, inst, src); + break; + case OPCODE_LOG: + inst = t_opcode_log(vp, vpi, inst, src); + break; + case OPCODE_MAD: + inst = t_opcode_mad(vp, vpi, inst, src); + break; + case OPCODE_MAX: + inst = t_opcode_max(vp, vpi, inst, src); + break; + case OPCODE_MIN: + inst = t_opcode_min(vp, vpi, inst, src); + break; + case OPCODE_MOV: + inst = t_opcode_mov(vp, vpi, inst, src); break; - case OPCODE_END: + case OPCODE_MUL: + inst = t_opcode_mul(vp, vpi, inst, src); + break; + case OPCODE_POW: + inst = t_opcode_pow(vp, vpi, inst, src); + break; + case OPCODE_RCP: + inst = t_opcode_rcp(vp, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = t_opcode_rsq(vp, vpi, inst, src); + break; + case OPCODE_SGE: + inst = t_opcode_sge(vp, vpi, inst, src); + break; + case OPCODE_SLT: + inst = t_opcode_slt(vp, vpi, inst, src); + break; + case OPCODE_SUB: + inst = t_opcode_sub(vp, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = t_opcode_swz(vp, vpi, inst, src); + break; + case OPCODE_XPD: + inst = + t_opcode_xpd(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); break; default: + assert(0); break; } - - o_inst->op = - MAKE_VSF_OP(t_opcode(vpi->Opcode), - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - if (are_srcs_scalar) { - switch (operands) { - case 1: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = t_src_scalar(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } else { - switch (operands) { - case 1: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } - next:; } - /* Will most likely segfault before we get here... fix later. */ - if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) { + vp->program.length = (inst - vp->program.body.i); + if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { vp->program.length = 0; vp->native = GL_FALSE; - return; } - vp->program.length = (o_inst - vp->program.body.i) * 4; #if 0 fprintf(stderr, "hw program:\n"); for (i = 0; i < vp->program.length; i++) @@ -1065,7 +1309,8 @@ static void position_invariant(struct gl_program *prog) struct gl_program_parameter_list *paramList; int i; - gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + gl_state_index tokens[STATE_LENGTH] = + { STATE_MVP_MATRIX, 0, 0, 0, 0 }; /* tokens[4] = matrix modifier */ #ifdef PREFER_DP4 @@ -1159,8 +1404,8 @@ static void insert_wpos(struct r300_vertex_program *vp, prog->NumInstructions - 1); /* END */ _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - 1], - 1); + &prog->Instructions[prog->NumInstructions - + 1], 1); vpi_insert = &vpi[prog->NumInstructions - 1]; vpi_insert[i].Opcode = OPCODE_MOV; @@ -1206,8 +1451,8 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, prog->NumTemporaries++; for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && - vpi->DstReg.Index == VERT_RESULT_HPOS) { + if (vpi->DstReg.File == PROGRAM_OUTPUT + && vpi->DstReg.Index == VERT_RESULT_HPOS) { vpi->DstReg.File = PROGRAM_TEMPORARY; vpi->DstReg.Index = tempregi; } @@ -1223,20 +1468,18 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key vp = _mesa_calloc(sizeof(*vp)); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; if (mesa_vp->IsPositionInvariant) { position_invariant(&mesa_vp->Base); } - if (wpos_idx > -1) + if (wpos_idx > -1) { pos_as_texcoord(vp, &mesa_vp->Base); + } assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); return vp; @@ -1252,11 +1495,10 @@ void r300SelectVertexShader(r300ContextPtr r300) struct r300_vertex_program *vp; GLint wpos_idx; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + vpc = + (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - wpos_idx = -1; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1271,28 +1513,35 @@ void r300SelectVertexShader(r300ContextPtr r300) InputsRead |= (FRAG_BIT_TEX0 << i); wpos_idx = i; } + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - if (InputsRead & FRAG_BIT_COL0) + if (InputsRead & FRAG_BIT_COL0) { wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; + } - if ((InputsRead & FRAG_BIT_COL1) /*|| - (InputsRead & FRAG_BIT_FOGC) */ ) + if ((InputsRead & FRAG_BIT_COL1)) { wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; + } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (InputsRead & (FRAG_BIT_TEX0 << i)) + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + } + } - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; if (vpc->mesa_program.IsPositionInvariant) { /* we wan't position don't we ? */ wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); } for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == - 0) { + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { r300->selected_vp = vp; return; } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 252d5a9..3df0eee 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,11 +3,6 @@ #include "r300_reg.h" -typedef struct { - GLuint op; - GLuint src[3]; -} VERTEX_SHADER_INSTRUCTION; - #define VSF_FLAG_X 1 #define VSF_FLAG_Y 2 #define VSF_FLAG_Z 4 diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h index c47adc9..a344837 100644 --- a/src/mesa/drivers/dri/r300/radeon_lock.h +++ b/src/mesa/drivers/dri/r300/radeon_lock.h @@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __RADEON_LOCK_H__ #define __RADEON_LOCK_H__ -#if 0 -#include "r200_ioctl.h" -#endif #include "radeon_context.h" extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);